Command line
python /home/saxelrod/Repo/projects/chemprop/chemprop/train.py --config_path /home/saxelrod/synthetic/energy_ffn_fixed/train/config.json --data_path /home/saxelrod/synthetic/energy/train_full.csv --dataset_type regression
Args
{'activation': 'ReLU',
 'aggregation': 'mean',
 'aggregation_norm': 100,
 'atom_descriptors': None,
 'atom_descriptors_path': None,
 'atom_descriptors_size': 0,
 'atom_features_size': 0,
 'atom_messages': False,
 'batch_size': 50,
 'bias': False,
 'cache_cutoff': 10000,
 'checkpoint_dir': None,
 'checkpoint_path': None,
 'checkpoint_paths': None,
 'class_balance': False,
 'config_path': '/home/saxelrod/synthetic/energy_ffn_fixed/train/config.json',
 'crossval_index_dir': None,
 'crossval_index_file': None,
 'crossval_index_sets': None,
 'cuda': True,
 'data_path': '/home/saxelrod/synthetic/energy/train_full.csv',
 'dataset_type': 'regression',
 'depth': 4,
 'device': device(type='cuda', index=1),
 'dropout': 0.0,
 'ensemble_size': 1,
 'epochs': 30,
 'extra_metrics': [],
 'features_generator': ['morgan'],
 'features_only': True,
 'features_path': None,
 'features_scaling': False,
 'features_size': None,
 'ffn_hidden_size': 2100,
 'ffn_num_layers': 3,
 'final_lr': 0.0001,
 'folds_file': None,
 'gpu': 1,
 'grad_clip': None,
 'hidden_size': 2100,
 'ignore_columns': None,
 'init_lr': 0.0001,
 'log_frequency': 10,
 'max_data_size': None,
 'max_lr': 0.001,
 'metric': 'mae',
 'metrics': ['mae'],
 'minimize_score': True,
 'mpn_shared': False,
 'multiclass_num_classes': 3,
 'no_cache_mol': False,
 'no_cuda': False,
 'no_features_scaling': True,
 'num_folds': 10,
 'num_lrs': 1,
 'num_tasks': 1,
 'num_workers': 8,
 'number_of_molecules': 1,
 'pytorch_seed': 0,
 'quiet': True,
 'save_dir': '/home/saxelrod/synthetic/energy_ffn_fixed/train',
 'save_preds': False,
 'save_smiles_splits': False,
 'seed': 0,
 'separate_test_features_path': None,
 'separate_test_path': '/home/saxelrod/synthetic/energy/test_full.csv',
 'separate_val_features_path': None,
 'separate_val_path': '/home/saxelrod/synthetic/energy/val_full.csv',
 'show_individual_scores': False,
 'smiles_columns': [None],
 'split_sizes': (0.8, 0.1, 0.1),
 'split_type': 'random',
 'target_columns': None,
 'task_names': ['ensembleenergy'],
 'test': False,
 'test_fold_index': None,
 'train_data_size': None,
 'undirected': False,
 'use_input_features': True,
 'val_fold_index': None,
 'warmup_epochs': 2.0}
Loading data
Number of tasks = 1
Fold 0
Splitting data with seed 0
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 8.3928e-01, PNorm = 64.6279, GNorm = 2.7650, lr_0 = 1.0413e-04
Loss = 8.6525e-01, PNorm = 64.6385, GNorm = 3.6388, lr_0 = 1.0788e-04
Loss = 8.1868e-01, PNorm = 64.6499, GNorm = 2.5154, lr_0 = 1.1163e-04
Loss = 6.5826e-01, PNorm = 64.6600, GNorm = 2.7988, lr_0 = 1.1537e-04
Loss = 7.3127e-01, PNorm = 64.6705, GNorm = 2.6605, lr_0 = 1.1913e-04
Loss = 7.2952e-01, PNorm = 64.6804, GNorm = 2.2434, lr_0 = 1.2287e-04
Loss = 6.4294e-01, PNorm = 64.6895, GNorm = 3.0382, lr_0 = 1.2663e-04
Loss = 6.1005e-01, PNorm = 64.6983, GNorm = 3.8269, lr_0 = 1.3038e-04
Loss = 6.5945e-01, PNorm = 64.7083, GNorm = 2.1559, lr_0 = 1.3413e-04
Loss = 6.5236e-01, PNorm = 64.7185, GNorm = 1.9061, lr_0 = 1.3788e-04
Loss = 6.3588e-01, PNorm = 64.7293, GNorm = 2.5658, lr_0 = 1.4163e-04
Loss = 6.3836e-01, PNorm = 64.7408, GNorm = 3.2265, lr_0 = 1.4537e-04
Loss = 5.7845e-01, PNorm = 64.7523, GNorm = 2.1334, lr_0 = 1.4913e-04
Loss = 6.1899e-01, PNorm = 64.7627, GNorm = 2.9787, lr_0 = 1.5288e-04
Loss = 6.2219e-01, PNorm = 64.7737, GNorm = 3.0114, lr_0 = 1.5662e-04
Loss = 5.9574e-01, PNorm = 64.7871, GNorm = 3.0755, lr_0 = 1.6038e-04
Loss = 5.6880e-01, PNorm = 64.7980, GNorm = 1.9938, lr_0 = 1.6412e-04
Loss = 6.3410e-01, PNorm = 64.8099, GNorm = 2.5709, lr_0 = 1.6788e-04
Loss = 5.4344e-01, PNorm = 64.8222, GNorm = 2.7932, lr_0 = 1.7163e-04
Loss = 6.2470e-01, PNorm = 64.8348, GNorm = 2.1730, lr_0 = 1.7538e-04
Loss = 5.6777e-01, PNorm = 64.8503, GNorm = 3.4433, lr_0 = 1.7913e-04
Loss = 5.8065e-01, PNorm = 64.8654, GNorm = 2.7603, lr_0 = 1.8288e-04
Loss = 5.9586e-01, PNorm = 64.8796, GNorm = 3.0158, lr_0 = 1.8662e-04
Loss = 6.0143e-01, PNorm = 64.8953, GNorm = 2.8599, lr_0 = 1.9038e-04
Loss = 6.0157e-01, PNorm = 64.9110, GNorm = 1.8499, lr_0 = 1.9413e-04
Loss = 5.8366e-01, PNorm = 64.9281, GNorm = 2.6849, lr_0 = 1.9788e-04
Loss = 6.9315e-01, PNorm = 64.9482, GNorm = 2.3081, lr_0 = 2.0163e-04
Loss = 5.9502e-01, PNorm = 64.9653, GNorm = 2.5908, lr_0 = 2.0537e-04
Loss = 6.3427e-01, PNorm = 64.9808, GNorm = 1.8352, lr_0 = 2.0913e-04
Loss = 5.6418e-01, PNorm = 64.9971, GNorm = 3.2902, lr_0 = 2.1288e-04
Loss = 5.2236e-01, PNorm = 65.0147, GNorm = 1.6478, lr_0 = 2.1663e-04
Loss = 5.8231e-01, PNorm = 65.0322, GNorm = 2.3071, lr_0 = 2.2038e-04
Loss = 6.7431e-01, PNorm = 65.0505, GNorm = 1.9475, lr_0 = 2.2412e-04
Loss = 5.4283e-01, PNorm = 65.0686, GNorm = 4.1439, lr_0 = 2.2787e-04
Loss = 6.0022e-01, PNorm = 65.0896, GNorm = 2.2284, lr_0 = 2.3163e-04
Loss = 5.2097e-01, PNorm = 65.1123, GNorm = 1.6186, lr_0 = 2.3538e-04
Loss = 5.1187e-01, PNorm = 65.1302, GNorm = 1.8823, lr_0 = 2.3913e-04
Loss = 5.3521e-01, PNorm = 65.1480, GNorm = 2.0111, lr_0 = 2.4288e-04
Loss = 6.2335e-01, PNorm = 65.1679, GNorm = 1.5020, lr_0 = 2.4662e-04
Loss = 6.0875e-01, PNorm = 65.1901, GNorm = 2.3691, lr_0 = 2.5038e-04
Loss = 6.4568e-01, PNorm = 65.2125, GNorm = 2.3726, lr_0 = 2.5413e-04
Loss = 6.8413e-01, PNorm = 65.2389, GNorm = 1.7324, lr_0 = 2.5788e-04
Loss = 5.2247e-01, PNorm = 65.2641, GNorm = 1.4116, lr_0 = 2.6163e-04
Loss = 5.5013e-01, PNorm = 65.2882, GNorm = 1.5365, lr_0 = 2.6537e-04
Loss = 4.8385e-01, PNorm = 65.3108, GNorm = 1.8173, lr_0 = 2.6912e-04
Loss = 5.8561e-01, PNorm = 65.3319, GNorm = 3.1052, lr_0 = 2.7288e-04
Loss = 5.9582e-01, PNorm = 65.3563, GNorm = 1.5721, lr_0 = 2.7663e-04
Loss = 5.1869e-01, PNorm = 65.3828, GNorm = 1.5999, lr_0 = 2.8038e-04
Loss = 5.5473e-01, PNorm = 65.4048, GNorm = 1.7331, lr_0 = 2.8413e-04
Loss = 4.5645e-01, PNorm = 65.4325, GNorm = 1.3811, lr_0 = 2.8787e-04
Loss = 4.9618e-01, PNorm = 65.4592, GNorm = 1.5170, lr_0 = 2.9163e-04
Loss = 5.3260e-01, PNorm = 65.4823, GNorm = 1.6926, lr_0 = 2.9538e-04
Loss = 5.3381e-01, PNorm = 65.5072, GNorm = 1.5384, lr_0 = 2.9913e-04
Loss = 5.5276e-01, PNorm = 65.5360, GNorm = 1.5339, lr_0 = 3.0288e-04
Loss = 5.9453e-01, PNorm = 65.5681, GNorm = 1.6789, lr_0 = 3.0662e-04
Loss = 5.2022e-01, PNorm = 65.5973, GNorm = 2.0454, lr_0 = 3.1037e-04
Loss = 5.3935e-01, PNorm = 65.6304, GNorm = 1.4860, lr_0 = 3.1413e-04
Loss = 5.6846e-01, PNorm = 65.6655, GNorm = 2.0020, lr_0 = 3.1788e-04
Loss = 5.2147e-01, PNorm = 65.6934, GNorm = 1.6656, lr_0 = 3.2163e-04
Loss = 5.1052e-01, PNorm = 65.7213, GNorm = 1.7309, lr_0 = 3.2538e-04
Loss = 4.9519e-01, PNorm = 65.7529, GNorm = 1.3007, lr_0 = 3.2912e-04
Loss = 5.4902e-01, PNorm = 65.7847, GNorm = 1.5760, lr_0 = 3.3288e-04
Loss = 4.6798e-01, PNorm = 65.8167, GNorm = 1.4245, lr_0 = 3.3663e-04
Loss = 6.3879e-01, PNorm = 65.8498, GNorm = 1.5523, lr_0 = 3.4038e-04
Loss = 5.9755e-01, PNorm = 65.8861, GNorm = 1.5469, lr_0 = 3.4413e-04
Loss = 6.0244e-01, PNorm = 65.9243, GNorm = 1.4338, lr_0 = 3.4787e-04
Loss = 5.0620e-01, PNorm = 65.9620, GNorm = 1.9813, lr_0 = 3.5162e-04
Loss = 4.8186e-01, PNorm = 65.9930, GNorm = 1.3638, lr_0 = 3.5538e-04
Loss = 5.7279e-01, PNorm = 66.0312, GNorm = 1.4572, lr_0 = 3.5913e-04
Loss = 5.3578e-01, PNorm = 66.0660, GNorm = 1.4305, lr_0 = 3.6288e-04
Loss = 5.0653e-01, PNorm = 66.1040, GNorm = 1.1229, lr_0 = 3.6662e-04
Loss = 5.1752e-01, PNorm = 66.1431, GNorm = 1.6346, lr_0 = 3.7037e-04
Loss = 5.3643e-01, PNorm = 66.1779, GNorm = 1.1381, lr_0 = 3.7413e-04
Loss = 4.8939e-01, PNorm = 66.2177, GNorm = 1.3340, lr_0 = 3.7788e-04
Loss = 5.1923e-01, PNorm = 66.2524, GNorm = 1.2296, lr_0 = 3.8163e-04
Loss = 4.7058e-01, PNorm = 66.2909, GNorm = 1.1934, lr_0 = 3.8537e-04
Loss = 5.4334e-01, PNorm = 66.3315, GNorm = 1.6299, lr_0 = 3.8912e-04
Loss = 5.3120e-01, PNorm = 66.3745, GNorm = 1.5720, lr_0 = 3.9287e-04
Loss = 5.2576e-01, PNorm = 66.4241, GNorm = 1.5486, lr_0 = 3.9663e-04
Loss = 6.5626e-01, PNorm = 66.4774, GNorm = 2.1100, lr_0 = 4.0038e-04
Loss = 4.7891e-01, PNorm = 66.5257, GNorm = 1.1001, lr_0 = 4.0413e-04
Loss = 5.2471e-01, PNorm = 66.5711, GNorm = 1.3079, lr_0 = 4.0787e-04
Loss = 4.8974e-01, PNorm = 66.6211, GNorm = 1.5089, lr_0 = 4.1162e-04
Loss = 5.0368e-01, PNorm = 66.6585, GNorm = 1.3516, lr_0 = 4.1537e-04
Loss = 5.5569e-01, PNorm = 66.7076, GNorm = 1.2353, lr_0 = 4.1913e-04
Loss = 5.0309e-01, PNorm = 66.7603, GNorm = 1.1101, lr_0 = 4.2288e-04
Loss = 5.2441e-01, PNorm = 66.8052, GNorm = 1.4027, lr_0 = 4.2662e-04
Loss = 5.7199e-01, PNorm = 66.8496, GNorm = 1.2356, lr_0 = 4.3037e-04
Loss = 5.1378e-01, PNorm = 66.8963, GNorm = 1.2768, lr_0 = 4.3412e-04
Loss = 5.6852e-01, PNorm = 66.9402, GNorm = 1.6618, lr_0 = 4.3788e-04
Loss = 4.9151e-01, PNorm = 66.9907, GNorm = 1.4376, lr_0 = 4.4163e-04
Loss = 5.0485e-01, PNorm = 67.0357, GNorm = 1.6137, lr_0 = 4.4538e-04
Loss = 5.0308e-01, PNorm = 67.0809, GNorm = 1.1855, lr_0 = 4.4912e-04
Loss = 4.4376e-01, PNorm = 67.1274, GNorm = 1.9211, lr_0 = 4.5287e-04
Loss = 5.4354e-01, PNorm = 67.1726, GNorm = 1.4366, lr_0 = 4.5662e-04
Loss = 5.1180e-01, PNorm = 67.2268, GNorm = 1.5324, lr_0 = 4.6038e-04
Loss = 4.9414e-01, PNorm = 67.2786, GNorm = 1.7339, lr_0 = 4.6413e-04
Loss = 4.9138e-01, PNorm = 67.3335, GNorm = 2.0907, lr_0 = 4.6787e-04
Loss = 4.7896e-01, PNorm = 67.3908, GNorm = 1.6045, lr_0 = 4.7162e-04
Loss = 4.6909e-01, PNorm = 67.4450, GNorm = 1.5212, lr_0 = 4.7537e-04
Loss = 5.1240e-01, PNorm = 67.4967, GNorm = 1.6568, lr_0 = 4.7913e-04
Loss = 5.1452e-01, PNorm = 67.5530, GNorm = 1.4169, lr_0 = 4.8288e-04
Loss = 5.1658e-01, PNorm = 67.6058, GNorm = 1.1231, lr_0 = 4.8663e-04
Loss = 5.1892e-01, PNorm = 67.6614, GNorm = 1.6836, lr_0 = 4.9038e-04
Loss = 5.5979e-01, PNorm = 67.7157, GNorm = 1.2014, lr_0 = 4.9412e-04
Loss = 4.3381e-01, PNorm = 67.7729, GNorm = 1.2475, lr_0 = 4.9788e-04
Loss = 5.7121e-01, PNorm = 67.8242, GNorm = 1.2169, lr_0 = 5.0163e-04
Loss = 5.5018e-01, PNorm = 67.8887, GNorm = 1.8692, lr_0 = 5.0538e-04
Loss = 4.9152e-01, PNorm = 67.9511, GNorm = 1.4464, lr_0 = 5.0913e-04
Loss = 5.2630e-01, PNorm = 68.0208, GNorm = 1.2574, lr_0 = 5.1287e-04
Loss = 5.4803e-01, PNorm = 68.0823, GNorm = 1.6084, lr_0 = 5.1663e-04
Loss = 4.6855e-01, PNorm = 68.1542, GNorm = 0.7730, lr_0 = 5.2038e-04
Loss = 4.8889e-01, PNorm = 68.2181, GNorm = 1.4090, lr_0 = 5.2413e-04
Loss = 5.3249e-01, PNorm = 68.2866, GNorm = 1.0282, lr_0 = 5.2788e-04
Loss = 4.8656e-01, PNorm = 68.3499, GNorm = 1.4120, lr_0 = 5.3162e-04
Loss = 4.7648e-01, PNorm = 68.4051, GNorm = 0.9739, lr_0 = 5.3538e-04
Loss = 5.6293e-01, PNorm = 68.4656, GNorm = 1.5681, lr_0 = 5.3912e-04
Loss = 4.2642e-01, PNorm = 68.5285, GNorm = 1.2956, lr_0 = 5.4288e-04
Loss = 5.1756e-01, PNorm = 68.5789, GNorm = 1.2376, lr_0 = 5.4663e-04
Loss = 4.1580e-01, PNorm = 68.6415, GNorm = 0.9823, lr_0 = 5.5038e-04
Validation mae = 0.128839
Epoch 1
Loss = 3.9214e-01, PNorm = 68.7053, GNorm = 1.3498, lr_0 = 5.5413e-04
Loss = 4.1797e-01, PNorm = 68.7727, GNorm = 1.0841, lr_0 = 5.5787e-04
Loss = 3.4467e-01, PNorm = 68.8423, GNorm = 0.9342, lr_0 = 5.6163e-04
Loss = 3.8651e-01, PNorm = 68.9228, GNorm = 1.2716, lr_0 = 5.6538e-04
Loss = 4.2930e-01, PNorm = 69.0064, GNorm = 1.3212, lr_0 = 5.6913e-04
Loss = 3.7605e-01, PNorm = 69.0940, GNorm = 1.0672, lr_0 = 5.7288e-04
Loss = 3.3416e-01, PNorm = 69.1787, GNorm = 1.0460, lr_0 = 5.7662e-04
Loss = 3.9392e-01, PNorm = 69.2667, GNorm = 1.6532, lr_0 = 5.8038e-04
Loss = 3.6181e-01, PNorm = 69.3512, GNorm = 1.0494, lr_0 = 5.8413e-04
Loss = 3.6719e-01, PNorm = 69.4381, GNorm = 1.3283, lr_0 = 5.8788e-04
Loss = 3.9374e-01, PNorm = 69.5313, GNorm = 1.0096, lr_0 = 5.9163e-04
Loss = 3.6689e-01, PNorm = 69.6274, GNorm = 0.9841, lr_0 = 5.9538e-04
Loss = 4.5112e-01, PNorm = 69.7360, GNorm = 1.5601, lr_0 = 5.9913e-04
Loss = 3.7723e-01, PNorm = 69.8461, GNorm = 1.2864, lr_0 = 6.0288e-04
Loss = 3.1423e-01, PNorm = 69.9455, GNorm = 1.1553, lr_0 = 6.0663e-04
Loss = 3.6530e-01, PNorm = 70.0440, GNorm = 0.9990, lr_0 = 6.1038e-04
Loss = 4.2048e-01, PNorm = 70.1345, GNorm = 1.1095, lr_0 = 6.1413e-04
Loss = 3.5797e-01, PNorm = 70.2444, GNorm = 1.2522, lr_0 = 6.1788e-04
Loss = 3.2260e-01, PNorm = 70.3477, GNorm = 0.8466, lr_0 = 6.2163e-04
Loss = 3.6030e-01, PNorm = 70.4536, GNorm = 1.6875, lr_0 = 6.2538e-04
Loss = 4.1503e-01, PNorm = 70.5545, GNorm = 1.2956, lr_0 = 6.2913e-04
Loss = 4.1779e-01, PNorm = 70.6831, GNorm = 1.3099, lr_0 = 6.3288e-04
Loss = 4.0357e-01, PNorm = 70.7883, GNorm = 1.8761, lr_0 = 6.3663e-04
Loss = 4.1782e-01, PNorm = 70.9129, GNorm = 1.3085, lr_0 = 6.4038e-04
Loss = 4.0409e-01, PNorm = 71.0241, GNorm = 1.2916, lr_0 = 6.4413e-04
Loss = 3.9369e-01, PNorm = 71.1367, GNorm = 1.4880, lr_0 = 6.4788e-04
Loss = 4.0672e-01, PNorm = 71.2560, GNorm = 1.1384, lr_0 = 6.5163e-04
Loss = 4.2202e-01, PNorm = 71.3761, GNorm = 0.9547, lr_0 = 6.5538e-04
Loss = 4.4023e-01, PNorm = 71.5051, GNorm = 1.4150, lr_0 = 6.5913e-04
Loss = 4.0085e-01, PNorm = 71.6295, GNorm = 1.3749, lr_0 = 6.6288e-04
Loss = 4.2889e-01, PNorm = 71.7544, GNorm = 1.2832, lr_0 = 6.6663e-04
Loss = 3.4812e-01, PNorm = 71.8810, GNorm = 1.4653, lr_0 = 6.7038e-04
Loss = 4.3153e-01, PNorm = 72.0034, GNorm = 1.2419, lr_0 = 6.7413e-04
Loss = 4.1718e-01, PNorm = 72.1340, GNorm = 1.8995, lr_0 = 6.7788e-04
Loss = 3.5021e-01, PNorm = 72.2574, GNorm = 1.2704, lr_0 = 6.8163e-04
Loss = 4.1445e-01, PNorm = 72.3822, GNorm = 1.1873, lr_0 = 6.8538e-04
Loss = 4.3032e-01, PNorm = 72.4999, GNorm = 2.2291, lr_0 = 6.8913e-04
Loss = 4.1487e-01, PNorm = 72.6276, GNorm = 1.5662, lr_0 = 6.9288e-04
Loss = 3.9436e-01, PNorm = 72.7680, GNorm = 1.1005, lr_0 = 6.9663e-04
Loss = 4.0788e-01, PNorm = 72.9111, GNorm = 1.3056, lr_0 = 7.0038e-04
Loss = 4.0393e-01, PNorm = 73.0374, GNorm = 1.3480, lr_0 = 7.0413e-04
Loss = 4.7084e-01, PNorm = 73.1718, GNorm = 1.5136, lr_0 = 7.0788e-04
Loss = 4.5187e-01, PNorm = 73.3087, GNorm = 0.8550, lr_0 = 7.1163e-04
Loss = 4.3913e-01, PNorm = 73.4422, GNorm = 0.9481, lr_0 = 7.1538e-04
Loss = 4.2212e-01, PNorm = 73.5716, GNorm = 1.1520, lr_0 = 7.1913e-04
Loss = 4.2895e-01, PNorm = 73.6901, GNorm = 1.5739, lr_0 = 7.2288e-04
Loss = 3.7967e-01, PNorm = 73.8115, GNorm = 0.9094, lr_0 = 7.2663e-04
Loss = 4.1964e-01, PNorm = 73.9354, GNorm = 1.3652, lr_0 = 7.3038e-04
Loss = 4.0281e-01, PNorm = 74.0507, GNorm = 1.1955, lr_0 = 7.3413e-04
Loss = 4.0703e-01, PNorm = 74.1821, GNorm = 1.0472, lr_0 = 7.3788e-04
Loss = 4.2830e-01, PNorm = 74.3169, GNorm = 1.3049, lr_0 = 7.4163e-04
Loss = 4.2158e-01, PNorm = 74.4646, GNorm = 1.7851, lr_0 = 7.4538e-04
Loss = 4.3108e-01, PNorm = 74.6067, GNorm = 1.1125, lr_0 = 7.4913e-04
Loss = 3.4983e-01, PNorm = 74.7614, GNorm = 1.6925, lr_0 = 7.5288e-04
Loss = 3.9495e-01, PNorm = 74.9056, GNorm = 1.8470, lr_0 = 7.5663e-04
Loss = 4.2503e-01, PNorm = 75.0518, GNorm = 1.2085, lr_0 = 7.6038e-04
Loss = 3.9971e-01, PNorm = 75.1938, GNorm = 0.9642, lr_0 = 7.6413e-04
Loss = 4.3239e-01, PNorm = 75.3395, GNorm = 1.2735, lr_0 = 7.6788e-04
Loss = 3.6586e-01, PNorm = 75.4843, GNorm = 0.8863, lr_0 = 7.7163e-04
Loss = 4.3989e-01, PNorm = 75.6180, GNorm = 1.6921, lr_0 = 7.7538e-04
Loss = 4.1030e-01, PNorm = 75.7519, GNorm = 1.7658, lr_0 = 7.7913e-04
Loss = 4.2811e-01, PNorm = 75.8944, GNorm = 1.1653, lr_0 = 7.8288e-04
Loss = 4.6104e-01, PNorm = 76.0386, GNorm = 1.2818, lr_0 = 7.8663e-04
Loss = 3.6755e-01, PNorm = 76.1711, GNorm = 1.8237, lr_0 = 7.9038e-04
Loss = 4.4886e-01, PNorm = 76.3096, GNorm = 1.3468, lr_0 = 7.9413e-04
Loss = 4.2839e-01, PNorm = 76.4339, GNorm = 1.4301, lr_0 = 7.9788e-04
Loss = 4.1334e-01, PNorm = 76.5769, GNorm = 1.2097, lr_0 = 8.0163e-04
Loss = 4.1599e-01, PNorm = 76.7124, GNorm = 1.0215, lr_0 = 8.0538e-04
Loss = 4.0131e-01, PNorm = 76.8502, GNorm = 1.3933, lr_0 = 8.0913e-04
Loss = 4.3737e-01, PNorm = 76.9880, GNorm = 1.0643, lr_0 = 8.1288e-04
Loss = 4.2246e-01, PNorm = 77.1407, GNorm = 1.1195, lr_0 = 8.1663e-04
Loss = 4.4044e-01, PNorm = 77.2858, GNorm = 1.5081, lr_0 = 8.2038e-04
Loss = 4.4963e-01, PNorm = 77.4312, GNorm = 0.9030, lr_0 = 8.2413e-04
Loss = 4.7578e-01, PNorm = 77.5808, GNorm = 1.9563, lr_0 = 8.2788e-04
Loss = 3.9133e-01, PNorm = 77.7191, GNorm = 1.2004, lr_0 = 8.3163e-04
Loss = 3.9277e-01, PNorm = 77.8564, GNorm = 0.9232, lr_0 = 8.3538e-04
Loss = 4.3903e-01, PNorm = 77.9844, GNorm = 0.8424, lr_0 = 8.3913e-04
Loss = 4.1981e-01, PNorm = 78.1199, GNorm = 1.3632, lr_0 = 8.4288e-04
Loss = 4.2275e-01, PNorm = 78.2603, GNorm = 1.1576, lr_0 = 8.4663e-04
Loss = 4.2282e-01, PNorm = 78.3931, GNorm = 1.0495, lr_0 = 8.5038e-04
Loss = 4.2767e-01, PNorm = 78.5359, GNorm = 1.5221, lr_0 = 8.5413e-04
Loss = 4.0974e-01, PNorm = 78.6712, GNorm = 1.1457, lr_0 = 8.5788e-04
Loss = 4.5173e-01, PNorm = 78.8215, GNorm = 1.6895, lr_0 = 8.6163e-04
Loss = 4.3749e-01, PNorm = 78.9685, GNorm = 1.2136, lr_0 = 8.6538e-04
Loss = 4.2934e-01, PNorm = 79.1254, GNorm = 1.2978, lr_0 = 8.6913e-04
Loss = 4.5857e-01, PNorm = 79.2730, GNorm = 0.8261, lr_0 = 8.7288e-04
Loss = 4.2550e-01, PNorm = 79.4306, GNorm = 1.2669, lr_0 = 8.7663e-04
Loss = 3.8932e-01, PNorm = 79.5983, GNorm = 1.1000, lr_0 = 8.8038e-04
Loss = 4.4230e-01, PNorm = 79.7668, GNorm = 1.4791, lr_0 = 8.8413e-04
Loss = 4.7344e-01, PNorm = 79.9320, GNorm = 1.2284, lr_0 = 8.8788e-04
Loss = 3.9793e-01, PNorm = 80.0978, GNorm = 0.9763, lr_0 = 8.9163e-04
Loss = 5.2463e-01, PNorm = 80.2602, GNorm = 3.6960, lr_0 = 8.9538e-04
Loss = 5.3771e-01, PNorm = 80.4359, GNorm = 1.1590, lr_0 = 8.9913e-04
Loss = 5.1414e-01, PNorm = 80.6298, GNorm = 1.0756, lr_0 = 9.0288e-04
Loss = 4.7900e-01, PNorm = 80.8361, GNorm = 2.2610, lr_0 = 9.0663e-04
Loss = 4.7395e-01, PNorm = 81.0523, GNorm = 1.4434, lr_0 = 9.1038e-04
Loss = 4.2998e-01, PNorm = 81.2408, GNorm = 1.1657, lr_0 = 9.1413e-04
Loss = 4.3627e-01, PNorm = 81.4230, GNorm = 1.2207, lr_0 = 9.1788e-04
Loss = 4.3603e-01, PNorm = 81.5928, GNorm = 1.1690, lr_0 = 9.2163e-04
Loss = 3.8856e-01, PNorm = 81.7794, GNorm = 0.9192, lr_0 = 9.2538e-04
Loss = 4.6320e-01, PNorm = 81.9680, GNorm = 0.9547, lr_0 = 9.2913e-04
Loss = 4.1703e-01, PNorm = 82.1481, GNorm = 0.9846, lr_0 = 9.3288e-04
Loss = 4.4493e-01, PNorm = 82.3017, GNorm = 1.0788, lr_0 = 9.3663e-04
Loss = 4.6451e-01, PNorm = 82.4785, GNorm = 1.0194, lr_0 = 9.4038e-04
Loss = 3.9965e-01, PNorm = 82.6338, GNorm = 1.3387, lr_0 = 9.4413e-04
Loss = 5.1842e-01, PNorm = 82.7925, GNorm = 1.2183, lr_0 = 9.4788e-04
Loss = 4.1066e-01, PNorm = 82.9320, GNorm = 0.7808, lr_0 = 9.5163e-04
Loss = 4.7773e-01, PNorm = 83.0765, GNorm = 0.9144, lr_0 = 9.5538e-04
Loss = 4.2351e-01, PNorm = 83.2296, GNorm = 1.1970, lr_0 = 9.5913e-04
Loss = 3.5629e-01, PNorm = 83.3714, GNorm = 1.4963, lr_0 = 9.6288e-04
Loss = 4.5253e-01, PNorm = 83.4999, GNorm = 0.7445, lr_0 = 9.6663e-04
Loss = 5.2210e-01, PNorm = 83.6572, GNorm = 2.1356, lr_0 = 9.7038e-04
Loss = 4.3596e-01, PNorm = 83.8030, GNorm = 1.5514, lr_0 = 9.7413e-04
Loss = 4.5151e-01, PNorm = 83.9706, GNorm = 1.2568, lr_0 = 9.7788e-04
Loss = 4.3642e-01, PNorm = 84.1187, GNorm = 0.8969, lr_0 = 9.8163e-04
Loss = 4.2445e-01, PNorm = 84.2777, GNorm = 1.4488, lr_0 = 9.8537e-04
Loss = 4.3133e-01, PNorm = 84.4386, GNorm = 1.5526, lr_0 = 9.8912e-04
Loss = 4.5135e-01, PNorm = 84.5997, GNorm = 1.1123, lr_0 = 9.9288e-04
Loss = 4.4510e-01, PNorm = 84.7632, GNorm = 1.0564, lr_0 = 9.9663e-04
Loss = 3.7847e-01, PNorm = 84.9169, GNorm = 1.3049, lr_0 = 9.9993e-04
Validation mae = 0.128436
Epoch 2
Loss = 2.9005e-01, PNorm = 85.0745, GNorm = 1.2748, lr_0 = 9.9925e-04
Loss = 3.2471e-01, PNorm = 85.2156, GNorm = 1.6453, lr_0 = 9.9856e-04
Loss = 3.0271e-01, PNorm = 85.3609, GNorm = 0.8053, lr_0 = 9.9788e-04
Loss = 2.8185e-01, PNorm = 85.5059, GNorm = 1.0235, lr_0 = 9.9719e-04
Loss = 2.7321e-01, PNorm = 85.6495, GNorm = 0.9539, lr_0 = 9.9651e-04
Loss = 2.7050e-01, PNorm = 85.8065, GNorm = 0.8877, lr_0 = 9.9583e-04
Loss = 3.0213e-01, PNorm = 85.9750, GNorm = 0.9259, lr_0 = 9.9515e-04
Loss = 2.4954e-01, PNorm = 86.1444, GNorm = 0.8539, lr_0 = 9.9446e-04
Loss = 2.9619e-01, PNorm = 86.3157, GNorm = 0.8767, lr_0 = 9.9378e-04
Loss = 2.9078e-01, PNorm = 86.4958, GNorm = 1.0645, lr_0 = 9.9310e-04
Loss = 3.6473e-01, PNorm = 86.7026, GNorm = 1.1490, lr_0 = 9.9242e-04
Loss = 3.5495e-01, PNorm = 86.8955, GNorm = 1.8781, lr_0 = 9.9174e-04
Loss = 2.7704e-01, PNorm = 87.0797, GNorm = 1.3128, lr_0 = 9.9106e-04
Loss = 2.5362e-01, PNorm = 87.2535, GNorm = 0.6865, lr_0 = 9.9038e-04
Loss = 2.7132e-01, PNorm = 87.4177, GNorm = 1.1817, lr_0 = 9.8971e-04
Loss = 3.4874e-01, PNorm = 87.5916, GNorm = 1.7315, lr_0 = 9.8903e-04
Loss = 2.7968e-01, PNorm = 87.7755, GNorm = 0.9294, lr_0 = 9.8835e-04
Loss = 2.8186e-01, PNorm = 87.9369, GNorm = 1.1543, lr_0 = 9.8767e-04
Loss = 2.7121e-01, PNorm = 88.1049, GNorm = 1.1940, lr_0 = 9.8700e-04
Loss = 2.7496e-01, PNorm = 88.2770, GNorm = 0.7231, lr_0 = 9.8632e-04
Loss = 2.5389e-01, PNorm = 88.4429, GNorm = 1.0668, lr_0 = 9.8564e-04
Loss = 2.7658e-01, PNorm = 88.6101, GNorm = 0.9950, lr_0 = 9.8497e-04
Loss = 2.9646e-01, PNorm = 88.7657, GNorm = 1.0092, lr_0 = 9.8429e-04
Loss = 3.0100e-01, PNorm = 88.9393, GNorm = 0.9387, lr_0 = 9.8362e-04
Loss = 3.4916e-01, PNorm = 89.1073, GNorm = 0.7697, lr_0 = 9.8295e-04
Loss = 2.7515e-01, PNorm = 89.2976, GNorm = 0.8633, lr_0 = 9.8227e-04
Loss = 2.9591e-01, PNorm = 89.4653, GNorm = 1.0708, lr_0 = 9.8160e-04
Loss = 2.6661e-01, PNorm = 89.6223, GNorm = 1.0004, lr_0 = 9.8093e-04
Loss = 2.9065e-01, PNorm = 89.7798, GNorm = 0.8294, lr_0 = 9.8026e-04
Loss = 2.6139e-01, PNorm = 89.9406, GNorm = 1.0721, lr_0 = 9.7958e-04
Loss = 3.3285e-01, PNorm = 90.1020, GNorm = 1.2458, lr_0 = 9.7891e-04
Loss = 3.0434e-01, PNorm = 90.2762, GNorm = 1.4031, lr_0 = 9.7824e-04
Loss = 2.8501e-01, PNorm = 90.4468, GNorm = 1.3541, lr_0 = 9.7757e-04
Loss = 2.7900e-01, PNorm = 90.6084, GNorm = 0.8642, lr_0 = 9.7690e-04
Loss = 2.8324e-01, PNorm = 90.7768, GNorm = 1.2503, lr_0 = 9.7623e-04
Loss = 2.9608e-01, PNorm = 90.9340, GNorm = 0.8783, lr_0 = 9.7556e-04
Loss = 2.7559e-01, PNorm = 91.0956, GNorm = 1.0508, lr_0 = 9.7490e-04
Loss = 3.1329e-01, PNorm = 91.2556, GNorm = 1.2336, lr_0 = 9.7423e-04
Loss = 2.9260e-01, PNorm = 91.4313, GNorm = 1.1592, lr_0 = 9.7356e-04
Loss = 3.0536e-01, PNorm = 91.6029, GNorm = 1.4370, lr_0 = 9.7289e-04
Loss = 2.9708e-01, PNorm = 91.7879, GNorm = 1.0870, lr_0 = 9.7223e-04
Loss = 2.6288e-01, PNorm = 91.9544, GNorm = 0.7818, lr_0 = 9.7156e-04
Loss = 2.6678e-01, PNorm = 92.1259, GNorm = 0.7228, lr_0 = 9.7090e-04
Loss = 2.8270e-01, PNorm = 92.2702, GNorm = 0.8294, lr_0 = 9.7023e-04
Loss = 3.1667e-01, PNorm = 92.4358, GNorm = 1.3347, lr_0 = 9.6957e-04
Loss = 3.2231e-01, PNorm = 92.5955, GNorm = 0.8521, lr_0 = 9.6890e-04
Loss = 3.4411e-01, PNorm = 92.7687, GNorm = 0.8389, lr_0 = 9.6824e-04
Loss = 2.6523e-01, PNorm = 92.9287, GNorm = 1.1351, lr_0 = 9.6757e-04
Loss = 3.2717e-01, PNorm = 93.0919, GNorm = 1.1009, lr_0 = 9.6691e-04
Loss = 2.8709e-01, PNorm = 93.2597, GNorm = 1.0479, lr_0 = 9.6625e-04
Loss = 3.2379e-01, PNorm = 93.4135, GNorm = 1.0171, lr_0 = 9.6559e-04
Loss = 3.0544e-01, PNorm = 93.5750, GNorm = 0.8176, lr_0 = 9.6493e-04
Loss = 2.8949e-01, PNorm = 93.7448, GNorm = 1.1388, lr_0 = 9.6427e-04
Loss = 2.8183e-01, PNorm = 93.9085, GNorm = 1.5484, lr_0 = 9.6360e-04
Loss = 3.0835e-01, PNorm = 94.0650, GNorm = 0.9072, lr_0 = 9.6294e-04
Loss = 2.7032e-01, PNorm = 94.2216, GNorm = 1.2751, lr_0 = 9.6228e-04
Loss = 3.1863e-01, PNorm = 94.3814, GNorm = 2.2932, lr_0 = 9.6163e-04
Loss = 2.8954e-01, PNorm = 94.5522, GNorm = 0.9801, lr_0 = 9.6097e-04
Loss = 3.2564e-01, PNorm = 94.7327, GNorm = 1.1464, lr_0 = 9.6031e-04
Loss = 3.3641e-01, PNorm = 94.8936, GNorm = 1.0539, lr_0 = 9.5965e-04
Loss = 3.0756e-01, PNorm = 95.0679, GNorm = 1.1805, lr_0 = 9.5899e-04
Loss = 3.8127e-01, PNorm = 95.2467, GNorm = 1.6391, lr_0 = 9.5834e-04
Loss = 3.3817e-01, PNorm = 95.4287, GNorm = 1.4034, lr_0 = 9.5768e-04
Loss = 2.9797e-01, PNorm = 95.6078, GNorm = 1.1096, lr_0 = 9.5702e-04
Loss = 3.2294e-01, PNorm = 95.7767, GNorm = 0.9634, lr_0 = 9.5637e-04
Loss = 2.7926e-01, PNorm = 95.9582, GNorm = 1.0434, lr_0 = 9.5571e-04
Loss = 3.5139e-01, PNorm = 96.1278, GNorm = 0.8391, lr_0 = 9.5506e-04
Loss = 2.7967e-01, PNorm = 96.3139, GNorm = 0.9105, lr_0 = 9.5440e-04
Loss = 2.7736e-01, PNorm = 96.4808, GNorm = 0.9017, lr_0 = 9.5375e-04
Loss = 2.8337e-01, PNorm = 96.6348, GNorm = 1.5592, lr_0 = 9.5310e-04
Loss = 3.0341e-01, PNorm = 96.8030, GNorm = 1.0695, lr_0 = 9.5244e-04
Loss = 2.9252e-01, PNorm = 96.9619, GNorm = 0.9630, lr_0 = 9.5179e-04
Loss = 3.6705e-01, PNorm = 97.1296, GNorm = 1.6933, lr_0 = 9.5114e-04
Loss = 3.4502e-01, PNorm = 97.2966, GNorm = 0.8939, lr_0 = 9.5049e-04
Loss = 3.1701e-01, PNorm = 97.4559, GNorm = 0.8025, lr_0 = 9.4984e-04
Loss = 2.8326e-01, PNorm = 97.6172, GNorm = 0.5838, lr_0 = 9.4919e-04
Loss = 3.1601e-01, PNorm = 97.7726, GNorm = 1.3003, lr_0 = 9.4854e-04
Loss = 3.8775e-01, PNorm = 97.9298, GNorm = 0.7420, lr_0 = 9.4789e-04
Loss = 3.2358e-01, PNorm = 98.1053, GNorm = 1.1357, lr_0 = 9.4724e-04
Loss = 2.8325e-01, PNorm = 98.2720, GNorm = 1.4072, lr_0 = 9.4659e-04
Loss = 3.5429e-01, PNorm = 98.4268, GNorm = 0.9032, lr_0 = 9.4594e-04
Loss = 3.4306e-01, PNorm = 98.5946, GNorm = 1.0623, lr_0 = 9.4529e-04
Loss = 3.7726e-01, PNorm = 98.7699, GNorm = 1.3288, lr_0 = 9.4464e-04
Loss = 3.0795e-01, PNorm = 98.9351, GNorm = 0.6996, lr_0 = 9.4400e-04
Loss = 3.6184e-01, PNorm = 99.0825, GNorm = 2.5791, lr_0 = 9.4335e-04
Loss = 3.5847e-01, PNorm = 99.2466, GNorm = 0.8798, lr_0 = 9.4270e-04
Loss = 3.2956e-01, PNorm = 99.4149, GNorm = 1.1028, lr_0 = 9.4206e-04
Loss = 3.3288e-01, PNorm = 99.5785, GNorm = 0.9017, lr_0 = 9.4141e-04
Loss = 3.8965e-01, PNorm = 99.7306, GNorm = 1.3550, lr_0 = 9.4077e-04
Loss = 3.4204e-01, PNorm = 99.9041, GNorm = 0.8710, lr_0 = 9.4012e-04
Loss = 3.0762e-01, PNorm = 100.0739, GNorm = 1.1512, lr_0 = 9.3948e-04
Loss = 3.3023e-01, PNorm = 100.2470, GNorm = 1.1814, lr_0 = 9.3884e-04
Loss = 3.2941e-01, PNorm = 100.4114, GNorm = 1.1781, lr_0 = 9.3819e-04
Loss = 3.4536e-01, PNorm = 100.5722, GNorm = 0.7328, lr_0 = 9.3755e-04
Loss = 3.4664e-01, PNorm = 100.7424, GNorm = 1.3547, lr_0 = 9.3691e-04
Loss = 2.9219e-01, PNorm = 100.9004, GNorm = 1.3882, lr_0 = 9.3627e-04
Loss = 3.3462e-01, PNorm = 101.0604, GNorm = 0.7540, lr_0 = 9.3562e-04
Loss = 3.4035e-01, PNorm = 101.2282, GNorm = 0.7404, lr_0 = 9.3498e-04
Loss = 3.4437e-01, PNorm = 101.3756, GNorm = 1.2214, lr_0 = 9.3434e-04
Loss = 3.0991e-01, PNorm = 101.5260, GNorm = 1.1896, lr_0 = 9.3370e-04
Loss = 3.3625e-01, PNorm = 101.6723, GNorm = 1.1806, lr_0 = 9.3306e-04
Loss = 3.0629e-01, PNorm = 101.8213, GNorm = 1.1145, lr_0 = 9.3242e-04
Loss = 3.2372e-01, PNorm = 101.9632, GNorm = 0.9603, lr_0 = 9.3178e-04
Loss = 3.5399e-01, PNorm = 102.1150, GNorm = 0.7529, lr_0 = 9.3115e-04
Loss = 2.9875e-01, PNorm = 102.2705, GNorm = 1.2898, lr_0 = 9.3051e-04
Loss = 3.2888e-01, PNorm = 102.4067, GNorm = 0.8890, lr_0 = 9.2987e-04
Loss = 3.1826e-01, PNorm = 102.5498, GNorm = 1.1737, lr_0 = 9.2923e-04
Loss = 2.8795e-01, PNorm = 102.6874, GNorm = 1.1435, lr_0 = 9.2860e-04
Loss = 3.4109e-01, PNorm = 102.8262, GNorm = 0.7792, lr_0 = 9.2796e-04
Loss = 3.4160e-01, PNorm = 102.9625, GNorm = 1.2082, lr_0 = 9.2733e-04
Loss = 3.7555e-01, PNorm = 103.1021, GNorm = 1.5486, lr_0 = 9.2669e-04
Loss = 2.8485e-01, PNorm = 103.2321, GNorm = 0.9221, lr_0 = 9.2606e-04
Loss = 2.9349e-01, PNorm = 103.3663, GNorm = 1.2866, lr_0 = 9.2542e-04
Loss = 3.6203e-01, PNorm = 103.5040, GNorm = 1.6699, lr_0 = 9.2479e-04
Loss = 3.0465e-01, PNorm = 103.6519, GNorm = 1.9595, lr_0 = 9.2415e-04
Loss = 3.0220e-01, PNorm = 103.7970, GNorm = 1.0324, lr_0 = 9.2352e-04
Loss = 3.5775e-01, PNorm = 103.9586, GNorm = 1.1009, lr_0 = 9.2289e-04
Loss = 3.4075e-01, PNorm = 104.1129, GNorm = 2.2048, lr_0 = 9.2226e-04
Loss = 3.2119e-01, PNorm = 104.2713, GNorm = 1.0268, lr_0 = 9.2162e-04
Loss = 4.3562e-01, PNorm = 104.4371, GNorm = 1.8429, lr_0 = 9.2099e-04
Validation mae = 0.128064
Epoch 3
Loss = 1.9466e-01, PNorm = 104.5818, GNorm = 0.6402, lr_0 = 9.2036e-04
Loss = 2.0475e-01, PNorm = 104.7164, GNorm = 0.7875, lr_0 = 9.1973e-04
Loss = 1.8020e-01, PNorm = 104.8293, GNorm = 0.8244, lr_0 = 9.1910e-04
Loss = 1.9356e-01, PNorm = 104.9377, GNorm = 0.7082, lr_0 = 9.1847e-04
Loss = 1.9397e-01, PNorm = 105.0436, GNorm = 0.8010, lr_0 = 9.1784e-04
Loss = 1.6398e-01, PNorm = 105.1364, GNorm = 0.7921, lr_0 = 9.1721e-04
Loss = 2.1616e-01, PNorm = 105.2354, GNorm = 1.0632, lr_0 = 9.1658e-04
Loss = 2.2225e-01, PNorm = 105.3492, GNorm = 3.2232, lr_0 = 9.1596e-04
Loss = 1.5057e-01, PNorm = 105.4605, GNorm = 0.7123, lr_0 = 9.1533e-04
Loss = 1.7686e-01, PNorm = 105.5702, GNorm = 0.7770, lr_0 = 9.1470e-04
Loss = 2.1436e-01, PNorm = 105.6777, GNorm = 0.7237, lr_0 = 9.1408e-04
Loss = 1.7894e-01, PNorm = 105.7750, GNorm = 0.6202, lr_0 = 9.1345e-04
Loss = 1.8509e-01, PNorm = 105.8818, GNorm = 1.9431, lr_0 = 9.1282e-04
Loss = 1.6187e-01, PNorm = 105.9803, GNorm = 1.0174, lr_0 = 9.1220e-04
Loss = 1.5209e-01, PNorm = 106.0926, GNorm = 0.9190, lr_0 = 9.1157e-04
Loss = 1.7027e-01, PNorm = 106.2053, GNorm = 1.0033, lr_0 = 9.1095e-04
Loss = 1.7756e-01, PNorm = 106.3064, GNorm = 0.6788, lr_0 = 9.1032e-04
Loss = 1.5098e-01, PNorm = 106.4167, GNorm = 0.5622, lr_0 = 9.0970e-04
Loss = 2.1631e-01, PNorm = 106.5242, GNorm = 0.9927, lr_0 = 9.0908e-04
Loss = 1.6984e-01, PNorm = 106.6313, GNorm = 1.2316, lr_0 = 9.0846e-04
Loss = 1.8442e-01, PNorm = 106.7383, GNorm = 1.2293, lr_0 = 9.0783e-04
Loss = 1.9948e-01, PNorm = 106.8679, GNorm = 1.3712, lr_0 = 9.0721e-04
Loss = 1.8673e-01, PNorm = 106.9833, GNorm = 0.9175, lr_0 = 9.0659e-04
Loss = 2.1844e-01, PNorm = 107.1196, GNorm = 1.4361, lr_0 = 9.0597e-04
Loss = 1.9118e-01, PNorm = 107.2318, GNorm = 0.9883, lr_0 = 9.0535e-04
Loss = 1.7829e-01, PNorm = 107.3594, GNorm = 1.1594, lr_0 = 9.0473e-04
Loss = 1.5602e-01, PNorm = 107.4680, GNorm = 1.7960, lr_0 = 9.0411e-04
Loss = 1.7877e-01, PNorm = 107.5754, GNorm = 0.5832, lr_0 = 9.0349e-04
Loss = 1.6012e-01, PNorm = 107.6897, GNorm = 0.7104, lr_0 = 9.0287e-04
Loss = 2.1124e-01, PNorm = 107.7950, GNorm = 1.0348, lr_0 = 9.0225e-04
Loss = 2.1967e-01, PNorm = 107.8986, GNorm = 1.3697, lr_0 = 9.0163e-04
Loss = 2.1834e-01, PNorm = 108.0373, GNorm = 1.1849, lr_0 = 9.0102e-04
Loss = 1.8554e-01, PNorm = 108.1522, GNorm = 0.8872, lr_0 = 9.0040e-04
Loss = 1.9273e-01, PNorm = 108.2730, GNorm = 0.7550, lr_0 = 8.9978e-04
Loss = 1.8780e-01, PNorm = 108.3781, GNorm = 0.9582, lr_0 = 8.9916e-04
Loss = 1.8184e-01, PNorm = 108.4836, GNorm = 0.9809, lr_0 = 8.9855e-04
Loss = 1.8303e-01, PNorm = 108.5998, GNorm = 0.8398, lr_0 = 8.9793e-04
Loss = 1.6513e-01, PNorm = 108.7034, GNorm = 0.8931, lr_0 = 8.9732e-04
Loss = 1.9451e-01, PNorm = 108.8236, GNorm = 1.1577, lr_0 = 8.9670e-04
Loss = 1.8649e-01, PNorm = 108.9350, GNorm = 1.4967, lr_0 = 8.9609e-04
Loss = 1.8427e-01, PNorm = 109.0394, GNorm = 0.8848, lr_0 = 8.9548e-04
Loss = 1.9054e-01, PNorm = 109.1477, GNorm = 1.0490, lr_0 = 8.9486e-04
Loss = 1.9112e-01, PNorm = 109.2545, GNorm = 0.6868, lr_0 = 8.9425e-04
Loss = 2.1098e-01, PNorm = 109.3679, GNorm = 0.8172, lr_0 = 8.9364e-04
Loss = 1.8538e-01, PNorm = 109.4754, GNorm = 0.8462, lr_0 = 8.9302e-04
Loss = 1.9466e-01, PNorm = 109.5982, GNorm = 1.0468, lr_0 = 8.9241e-04
Loss = 2.0614e-01, PNorm = 109.7027, GNorm = 0.8703, lr_0 = 8.9180e-04
Loss = 2.5740e-01, PNorm = 109.8423, GNorm = 1.1712, lr_0 = 8.9119e-04
Loss = 2.0967e-01, PNorm = 109.9531, GNorm = 0.9580, lr_0 = 8.9058e-04
Loss = 2.1801e-01, PNorm = 110.0889, GNorm = 0.9340, lr_0 = 8.8997e-04
Loss = 2.1386e-01, PNorm = 110.2064, GNorm = 0.8032, lr_0 = 8.8936e-04
Loss = 1.9082e-01, PNorm = 110.3368, GNorm = 0.6788, lr_0 = 8.8875e-04
Loss = 1.9157e-01, PNorm = 110.4569, GNorm = 1.3127, lr_0 = 8.8814e-04
Loss = 2.0402e-01, PNorm = 110.5762, GNorm = 1.7311, lr_0 = 8.8753e-04
Loss = 2.2202e-01, PNorm = 110.6806, GNorm = 0.9453, lr_0 = 8.8693e-04
Loss = 2.0802e-01, PNorm = 110.8017, GNorm = 1.3662, lr_0 = 8.8632e-04
Loss = 2.2344e-01, PNorm = 110.9218, GNorm = 1.4868, lr_0 = 8.8571e-04
Loss = 2.0987e-01, PNorm = 111.0458, GNorm = 1.1427, lr_0 = 8.8510e-04
Loss = 1.9047e-01, PNorm = 111.1565, GNorm = 0.8709, lr_0 = 8.8450e-04
Loss = 1.8886e-01, PNorm = 111.2824, GNorm = 1.1751, lr_0 = 8.8389e-04
Loss = 2.0447e-01, PNorm = 111.4088, GNorm = 1.0089, lr_0 = 8.8329e-04
Loss = 2.1263e-01, PNorm = 111.5394, GNorm = 0.6899, lr_0 = 8.8268e-04
Loss = 2.1647e-01, PNorm = 111.6679, GNorm = 0.9962, lr_0 = 8.8208e-04
Loss = 2.5892e-01, PNorm = 111.7867, GNorm = 0.9727, lr_0 = 8.8147e-04
Loss = 2.1245e-01, PNorm = 111.9107, GNorm = 0.6607, lr_0 = 8.8087e-04
Loss = 2.0550e-01, PNorm = 112.0477, GNorm = 1.0230, lr_0 = 8.8026e-04
Loss = 2.1877e-01, PNorm = 112.1572, GNorm = 1.0424, lr_0 = 8.7966e-04
Loss = 2.2084e-01, PNorm = 112.2912, GNorm = 1.3317, lr_0 = 8.7906e-04
Loss = 1.8848e-01, PNorm = 112.4290, GNorm = 0.8246, lr_0 = 8.7846e-04
Loss = 1.6102e-01, PNorm = 112.5557, GNorm = 0.6123, lr_0 = 8.7785e-04
Loss = 2.3479e-01, PNorm = 112.6674, GNorm = 0.7749, lr_0 = 8.7725e-04
Loss = 2.0863e-01, PNorm = 112.8026, GNorm = 1.0272, lr_0 = 8.7665e-04
Loss = 2.3536e-01, PNorm = 112.9376, GNorm = 0.6367, lr_0 = 8.7605e-04
Loss = 1.8305e-01, PNorm = 113.0657, GNorm = 0.8328, lr_0 = 8.7545e-04
Loss = 2.0999e-01, PNorm = 113.1938, GNorm = 0.7466, lr_0 = 8.7485e-04
Loss = 2.1054e-01, PNorm = 113.3203, GNorm = 0.7512, lr_0 = 8.7425e-04
Loss = 2.1077e-01, PNorm = 113.4414, GNorm = 1.4168, lr_0 = 8.7365e-04
Loss = 2.1488e-01, PNorm = 113.5699, GNorm = 1.2234, lr_0 = 8.7306e-04
Loss = 2.1730e-01, PNorm = 113.6934, GNorm = 0.8079, lr_0 = 8.7246e-04
Loss = 1.9428e-01, PNorm = 113.8276, GNorm = 0.7460, lr_0 = 8.7186e-04
Loss = 2.4331e-01, PNorm = 113.9508, GNorm = 0.9175, lr_0 = 8.7126e-04
Loss = 2.1440e-01, PNorm = 114.0954, GNorm = 1.1555, lr_0 = 8.7067e-04
Loss = 2.1288e-01, PNorm = 114.2265, GNorm = 1.2781, lr_0 = 8.7007e-04
Loss = 2.2430e-01, PNorm = 114.3641, GNorm = 1.0075, lr_0 = 8.6947e-04
Loss = 2.1673e-01, PNorm = 114.5049, GNorm = 0.8810, lr_0 = 8.6888e-04
Loss = 2.1872e-01, PNorm = 114.6319, GNorm = 0.8337, lr_0 = 8.6828e-04
Loss = 2.2508e-01, PNorm = 114.7648, GNorm = 0.9152, lr_0 = 8.6769e-04
Loss = 2.0660e-01, PNorm = 114.8765, GNorm = 1.1379, lr_0 = 8.6709e-04
Loss = 1.7874e-01, PNorm = 114.9923, GNorm = 0.8498, lr_0 = 8.6650e-04
Loss = 1.8953e-01, PNorm = 115.1041, GNorm = 0.9728, lr_0 = 8.6590e-04
Loss = 1.9114e-01, PNorm = 115.2298, GNorm = 1.1434, lr_0 = 8.6531e-04
Loss = 2.0183e-01, PNorm = 115.3376, GNorm = 0.7439, lr_0 = 8.6472e-04
Loss = 1.8999e-01, PNorm = 115.4504, GNorm = 0.8321, lr_0 = 8.6413e-04
Loss = 2.2042e-01, PNorm = 115.5542, GNorm = 1.3291, lr_0 = 8.6353e-04
Loss = 2.2101e-01, PNorm = 115.6704, GNorm = 0.9822, lr_0 = 8.6294e-04
Loss = 2.2185e-01, PNorm = 115.7904, GNorm = 1.0484, lr_0 = 8.6235e-04
Loss = 2.3520e-01, PNorm = 115.9182, GNorm = 0.8136, lr_0 = 8.6176e-04
Loss = 2.4734e-01, PNorm = 116.0459, GNorm = 1.0288, lr_0 = 8.6117e-04
Loss = 2.4185e-01, PNorm = 116.1772, GNorm = 0.7403, lr_0 = 8.6058e-04
Loss = 2.2146e-01, PNorm = 116.3109, GNorm = 0.7498, lr_0 = 8.5999e-04
Loss = 1.9596e-01, PNorm = 116.4443, GNorm = 0.8966, lr_0 = 8.5940e-04
Loss = 2.0112e-01, PNorm = 116.5786, GNorm = 0.9162, lr_0 = 8.5881e-04
Loss = 1.7344e-01, PNorm = 116.6971, GNorm = 1.0636, lr_0 = 8.5823e-04
Loss = 1.9186e-01, PNorm = 116.8139, GNorm = 1.0054, lr_0 = 8.5764e-04
Loss = 2.2747e-01, PNorm = 116.9327, GNorm = 0.8714, lr_0 = 8.5705e-04
Loss = 2.1622e-01, PNorm = 117.0551, GNorm = 1.1343, lr_0 = 8.5646e-04
Loss = 1.9503e-01, PNorm = 117.1886, GNorm = 1.3061, lr_0 = 8.5588e-04
Loss = 2.1749e-01, PNorm = 117.3243, GNorm = 0.8069, lr_0 = 8.5529e-04
Loss = 2.1200e-01, PNorm = 117.4583, GNorm = 1.0883, lr_0 = 8.5470e-04
Loss = 2.0368e-01, PNorm = 117.5883, GNorm = 0.8731, lr_0 = 8.5412e-04
Loss = 2.2810e-01, PNorm = 117.7126, GNorm = 1.1335, lr_0 = 8.5353e-04
Loss = 2.3489e-01, PNorm = 117.8396, GNorm = 0.8380, lr_0 = 8.5295e-04
Loss = 2.2963e-01, PNorm = 117.9637, GNorm = 0.8964, lr_0 = 8.5236e-04
Loss = 1.9085e-01, PNorm = 118.0875, GNorm = 0.8805, lr_0 = 8.5178e-04
Loss = 2.1607e-01, PNorm = 118.1961, GNorm = 1.1394, lr_0 = 8.5120e-04
Loss = 2.3027e-01, PNorm = 118.3294, GNorm = 1.1889, lr_0 = 8.5061e-04
Loss = 2.1313e-01, PNorm = 118.4495, GNorm = 0.8743, lr_0 = 8.5003e-04
Loss = 2.7287e-01, PNorm = 118.5821, GNorm = 1.0099, lr_0 = 8.4945e-04
Loss = 2.2333e-01, PNorm = 118.7140, GNorm = 1.2151, lr_0 = 8.4887e-04
Loss = 2.3958e-01, PNorm = 118.8476, GNorm = 1.0672, lr_0 = 8.4828e-04
Validation mae = 0.126260
Epoch 4
Loss = 1.3306e-01, PNorm = 118.9694, GNorm = 0.7928, lr_0 = 8.4770e-04
Loss = 1.3122e-01, PNorm = 119.0735, GNorm = 1.1221, lr_0 = 8.4712e-04
Loss = 1.4539e-01, PNorm = 119.1682, GNorm = 0.9046, lr_0 = 8.4654e-04
Loss = 1.3110e-01, PNorm = 119.2516, GNorm = 0.6825, lr_0 = 8.4596e-04
Loss = 1.3342e-01, PNorm = 119.3414, GNorm = 0.5360, lr_0 = 8.4538e-04
Loss = 1.2832e-01, PNorm = 119.4216, GNorm = 0.7893, lr_0 = 8.4480e-04
Loss = 1.1788e-01, PNorm = 119.4973, GNorm = 0.7616, lr_0 = 8.4423e-04
Loss = 1.1740e-01, PNorm = 119.5732, GNorm = 0.6672, lr_0 = 8.4365e-04
Loss = 1.1474e-01, PNorm = 119.6503, GNorm = 0.5573, lr_0 = 8.4307e-04
Loss = 1.2199e-01, PNorm = 119.7261, GNorm = 0.7118, lr_0 = 8.4249e-04
Loss = 1.2214e-01, PNorm = 119.8099, GNorm = 1.1533, lr_0 = 8.4191e-04
Loss = 1.2701e-01, PNorm = 119.8846, GNorm = 0.4967, lr_0 = 8.4134e-04
Loss = 1.0474e-01, PNorm = 119.9683, GNorm = 0.5981, lr_0 = 8.4076e-04
Loss = 1.2753e-01, PNorm = 120.0278, GNorm = 0.6871, lr_0 = 8.4019e-04
Loss = 1.3107e-01, PNorm = 120.1079, GNorm = 1.2510, lr_0 = 8.3961e-04
Loss = 1.0012e-01, PNorm = 120.1845, GNorm = 0.7213, lr_0 = 8.3903e-04
Loss = 1.2001e-01, PNorm = 120.2564, GNorm = 0.6756, lr_0 = 8.3846e-04
Loss = 1.0922e-01, PNorm = 120.3339, GNorm = 0.6039, lr_0 = 8.3789e-04
Loss = 1.0772e-01, PNorm = 120.4005, GNorm = 0.9468, lr_0 = 8.3731e-04
Loss = 1.0557e-01, PNorm = 120.4781, GNorm = 0.9851, lr_0 = 8.3674e-04
Loss = 1.1545e-01, PNorm = 120.5473, GNorm = 1.2524, lr_0 = 8.3616e-04
Loss = 1.1323e-01, PNorm = 120.6122, GNorm = 0.6420, lr_0 = 8.3559e-04
Loss = 1.0783e-01, PNorm = 120.6917, GNorm = 0.6720, lr_0 = 8.3502e-04
Loss = 1.2469e-01, PNorm = 120.7660, GNorm = 0.6934, lr_0 = 8.3445e-04
Loss = 1.1373e-01, PNorm = 120.8457, GNorm = 1.1287, lr_0 = 8.3388e-04
Loss = 9.6432e-02, PNorm = 120.9238, GNorm = 0.7884, lr_0 = 8.3330e-04
Loss = 1.1571e-01, PNorm = 120.9922, GNorm = 0.7720, lr_0 = 8.3273e-04
Loss = 1.3397e-01, PNorm = 121.0701, GNorm = 0.5539, lr_0 = 8.3216e-04
Loss = 1.1511e-01, PNorm = 121.1451, GNorm = 0.5792, lr_0 = 8.3159e-04
Loss = 1.1055e-01, PNorm = 121.2260, GNorm = 0.5824, lr_0 = 8.3102e-04
Loss = 1.4262e-01, PNorm = 121.2975, GNorm = 1.1300, lr_0 = 8.3045e-04
Loss = 1.4909e-01, PNorm = 121.3818, GNorm = 0.6675, lr_0 = 8.2988e-04
Loss = 1.1552e-01, PNorm = 121.4629, GNorm = 0.6078, lr_0 = 8.2932e-04
Loss = 1.3384e-01, PNorm = 121.5402, GNorm = 0.7121, lr_0 = 8.2875e-04
Loss = 1.1623e-01, PNorm = 121.6193, GNorm = 0.5452, lr_0 = 8.2818e-04
Loss = 1.4118e-01, PNorm = 121.6971, GNorm = 0.7208, lr_0 = 8.2761e-04
Loss = 1.1885e-01, PNorm = 121.7799, GNorm = 0.6479, lr_0 = 8.2705e-04
Loss = 1.2239e-01, PNorm = 121.8539, GNorm = 0.6262, lr_0 = 8.2648e-04
Loss = 1.5715e-01, PNorm = 121.9433, GNorm = 0.6427, lr_0 = 8.2591e-04
Loss = 1.1759e-01, PNorm = 122.0422, GNorm = 0.7149, lr_0 = 8.2535e-04
Loss = 1.3820e-01, PNorm = 122.1310, GNorm = 0.9204, lr_0 = 8.2478e-04
Loss = 1.2687e-01, PNorm = 122.2241, GNorm = 0.8404, lr_0 = 8.2422e-04
Loss = 1.3020e-01, PNorm = 122.3130, GNorm = 0.5087, lr_0 = 8.2365e-04
Loss = 1.3772e-01, PNorm = 122.4117, GNorm = 0.5141, lr_0 = 8.2309e-04
Loss = 1.1766e-01, PNorm = 122.5124, GNorm = 0.5457, lr_0 = 8.2252e-04
Loss = 1.1614e-01, PNorm = 122.6077, GNorm = 0.6855, lr_0 = 8.2196e-04
Loss = 1.0617e-01, PNorm = 122.6986, GNorm = 0.6461, lr_0 = 8.2140e-04
Loss = 1.2038e-01, PNorm = 122.7857, GNorm = 0.7267, lr_0 = 8.2084e-04
Loss = 1.1236e-01, PNorm = 122.8676, GNorm = 0.7131, lr_0 = 8.2027e-04
Loss = 1.1666e-01, PNorm = 122.9527, GNorm = 0.7322, lr_0 = 8.1971e-04
Loss = 1.4124e-01, PNorm = 123.0416, GNorm = 0.5557, lr_0 = 8.1915e-04
Loss = 1.3465e-01, PNorm = 123.1211, GNorm = 1.0499, lr_0 = 8.1859e-04
Loss = 1.2420e-01, PNorm = 123.2126, GNorm = 0.8027, lr_0 = 8.1803e-04
Loss = 1.0720e-01, PNorm = 123.3057, GNorm = 0.6676, lr_0 = 8.1747e-04
Loss = 1.2256e-01, PNorm = 123.3941, GNorm = 0.8061, lr_0 = 8.1691e-04
Loss = 1.2834e-01, PNorm = 123.4863, GNorm = 0.4785, lr_0 = 8.1635e-04
Loss = 1.2264e-01, PNorm = 123.5839, GNorm = 1.2709, lr_0 = 8.1579e-04
Loss = 1.3658e-01, PNorm = 123.6690, GNorm = 0.6237, lr_0 = 8.1523e-04
Loss = 1.2774e-01, PNorm = 123.7538, GNorm = 0.6621, lr_0 = 8.1467e-04
Loss = 1.0015e-01, PNorm = 123.8409, GNorm = 0.4125, lr_0 = 8.1411e-04
Loss = 1.2117e-01, PNorm = 123.9205, GNorm = 0.8518, lr_0 = 8.1355e-04
Loss = 1.2186e-01, PNorm = 124.0045, GNorm = 0.7448, lr_0 = 8.1300e-04
Loss = 1.1905e-01, PNorm = 124.0902, GNorm = 0.6047, lr_0 = 8.1244e-04
Loss = 1.3238e-01, PNorm = 124.1709, GNorm = 0.7435, lr_0 = 8.1188e-04
Loss = 1.1502e-01, PNorm = 124.2639, GNorm = 0.8956, lr_0 = 8.1133e-04
Loss = 1.2820e-01, PNorm = 124.3544, GNorm = 0.6849, lr_0 = 8.1077e-04
Loss = 1.4368e-01, PNorm = 124.4513, GNorm = 0.7155, lr_0 = 8.1022e-04
Loss = 1.2896e-01, PNorm = 124.5527, GNorm = 0.7038, lr_0 = 8.0966e-04
Loss = 1.3960e-01, PNorm = 124.6464, GNorm = 0.9309, lr_0 = 8.0911e-04
Loss = 1.2159e-01, PNorm = 124.7472, GNorm = 1.2098, lr_0 = 8.0855e-04
Loss = 1.3740e-01, PNorm = 124.8441, GNorm = 0.7259, lr_0 = 8.0800e-04
Loss = 1.3951e-01, PNorm = 124.9344, GNorm = 0.6016, lr_0 = 8.0745e-04
Loss = 1.3080e-01, PNorm = 125.0211, GNorm = 0.7006, lr_0 = 8.0689e-04
Loss = 1.3685e-01, PNorm = 125.1162, GNorm = 0.7735, lr_0 = 8.0634e-04
Loss = 1.2920e-01, PNorm = 125.2123, GNorm = 0.6951, lr_0 = 8.0579e-04
Loss = 1.4248e-01, PNorm = 125.3118, GNorm = 1.5301, lr_0 = 8.0523e-04
Loss = 1.3022e-01, PNorm = 125.4215, GNorm = 1.1085, lr_0 = 8.0468e-04
Loss = 1.5032e-01, PNorm = 125.5275, GNorm = 0.5660, lr_0 = 8.0413e-04
Loss = 1.3950e-01, PNorm = 125.6357, GNorm = 1.0054, lr_0 = 8.0358e-04
Loss = 1.4200e-01, PNorm = 125.7453, GNorm = 0.9076, lr_0 = 8.0303e-04
Loss = 1.3265e-01, PNorm = 125.8440, GNorm = 0.7310, lr_0 = 8.0248e-04
Loss = 1.3276e-01, PNorm = 125.9495, GNorm = 0.5998, lr_0 = 8.0193e-04
Loss = 1.6298e-01, PNorm = 126.0526, GNorm = 0.7825, lr_0 = 8.0138e-04
Loss = 1.3120e-01, PNorm = 126.1544, GNorm = 0.5781, lr_0 = 8.0083e-04
Loss = 1.4929e-01, PNorm = 126.2655, GNorm = 0.6436, lr_0 = 8.0028e-04
Loss = 1.3781e-01, PNorm = 126.3709, GNorm = 1.0642, lr_0 = 7.9974e-04
Loss = 1.1985e-01, PNorm = 126.4753, GNorm = 0.6488, lr_0 = 7.9919e-04
Loss = 1.2536e-01, PNorm = 126.5738, GNorm = 0.8898, lr_0 = 7.9864e-04
Loss = 1.3264e-01, PNorm = 126.6848, GNorm = 0.6463, lr_0 = 7.9809e-04
Loss = 1.3841e-01, PNorm = 126.7925, GNorm = 0.7616, lr_0 = 7.9755e-04
Loss = 1.1966e-01, PNorm = 126.8913, GNorm = 0.5824, lr_0 = 7.9700e-04
Loss = 1.4485e-01, PNorm = 126.9919, GNorm = 0.5643, lr_0 = 7.9645e-04
Loss = 1.1956e-01, PNorm = 127.0905, GNorm = 0.8287, lr_0 = 7.9591e-04
Loss = 1.5578e-01, PNorm = 127.1813, GNorm = 1.1326, lr_0 = 7.9536e-04
Loss = 1.0984e-01, PNorm = 127.2859, GNorm = 0.7056, lr_0 = 7.9482e-04
Loss = 1.6957e-01, PNorm = 127.3726, GNorm = 0.8101, lr_0 = 7.9427e-04
Loss = 1.6209e-01, PNorm = 127.4718, GNorm = 0.6550, lr_0 = 7.9373e-04
Loss = 1.2763e-01, PNorm = 127.5714, GNorm = 0.7843, lr_0 = 7.9319e-04
Loss = 1.4855e-01, PNorm = 127.6655, GNorm = 0.6480, lr_0 = 7.9264e-04
Loss = 1.4333e-01, PNorm = 127.7696, GNorm = 0.8405, lr_0 = 7.9210e-04
Loss = 1.4821e-01, PNorm = 127.8713, GNorm = 0.8477, lr_0 = 7.9156e-04
Loss = 1.3936e-01, PNorm = 127.9761, GNorm = 0.5433, lr_0 = 7.9101e-04
Loss = 1.3084e-01, PNorm = 128.0780, GNorm = 0.5974, lr_0 = 7.9047e-04
Loss = 1.1862e-01, PNorm = 128.1766, GNorm = 0.7258, lr_0 = 7.8993e-04
Loss = 1.4443e-01, PNorm = 128.2720, GNorm = 0.8074, lr_0 = 7.8939e-04
Loss = 1.5894e-01, PNorm = 128.3687, GNorm = 0.7355, lr_0 = 7.8885e-04
Loss = 1.3465e-01, PNorm = 128.4770, GNorm = 0.5883, lr_0 = 7.8831e-04
Loss = 1.5397e-01, PNorm = 128.5799, GNorm = 1.4675, lr_0 = 7.8777e-04
Loss = 1.3522e-01, PNorm = 128.6852, GNorm = 0.9838, lr_0 = 7.8723e-04
Loss = 1.5265e-01, PNorm = 128.7963, GNorm = 0.9557, lr_0 = 7.8669e-04
Loss = 1.3905e-01, PNorm = 128.8940, GNorm = 0.9471, lr_0 = 7.8615e-04
Loss = 1.3615e-01, PNorm = 128.9995, GNorm = 0.8586, lr_0 = 7.8561e-04
Loss = 1.3778e-01, PNorm = 129.0971, GNorm = 0.7590, lr_0 = 7.8507e-04
Loss = 1.5947e-01, PNorm = 129.2024, GNorm = 0.7611, lr_0 = 7.8454e-04
Loss = 1.4383e-01, PNorm = 129.3057, GNorm = 0.6735, lr_0 = 7.8400e-04
Loss = 1.2802e-01, PNorm = 129.4115, GNorm = 0.5379, lr_0 = 7.8346e-04
Loss = 1.3486e-01, PNorm = 129.5103, GNorm = 0.6526, lr_0 = 7.8293e-04
Loss = 1.3889e-01, PNorm = 129.6203, GNorm = 0.4181, lr_0 = 7.8239e-04
Loss = 1.6818e-01, PNorm = 129.7378, GNorm = 0.9259, lr_0 = 7.8185e-04
Loss = 1.4549e-01, PNorm = 129.8469, GNorm = 0.5250, lr_0 = 7.8132e-04
Validation mae = 0.126409
Epoch 5
Loss = 8.3795e-02, PNorm = 129.9366, GNorm = 0.5962, lr_0 = 7.8078e-04
Loss = 9.8535e-02, PNorm = 130.0036, GNorm = 0.7996, lr_0 = 7.8025e-04
Loss = 8.8520e-02, PNorm = 130.0759, GNorm = 1.4113, lr_0 = 7.7971e-04
Loss = 9.8906e-02, PNorm = 130.1467, GNorm = 0.8492, lr_0 = 7.7918e-04
Loss = 9.8670e-02, PNorm = 130.2175, GNorm = 0.7373, lr_0 = 7.7864e-04
Loss = 8.7501e-02, PNorm = 130.2829, GNorm = 0.6837, lr_0 = 7.7811e-04
Loss = 7.3646e-02, PNorm = 130.3501, GNorm = 0.5932, lr_0 = 7.7758e-04
Loss = 9.1772e-02, PNorm = 130.4062, GNorm = 0.6902, lr_0 = 7.7705e-04
Loss = 7.1011e-02, PNorm = 130.4627, GNorm = 0.8061, lr_0 = 7.7651e-04
Loss = 8.6490e-02, PNorm = 130.5196, GNorm = 0.4445, lr_0 = 7.7598e-04
Loss = 6.6548e-02, PNorm = 130.5764, GNorm = 0.5776, lr_0 = 7.7545e-04
Loss = 7.3387e-02, PNorm = 130.6332, GNorm = 0.6073, lr_0 = 7.7492e-04
Loss = 7.4838e-02, PNorm = 130.6927, GNorm = 0.6312, lr_0 = 7.7439e-04
Loss = 8.7607e-02, PNorm = 130.7545, GNorm = 0.4174, lr_0 = 7.7386e-04
Loss = 6.6106e-02, PNorm = 130.8147, GNorm = 0.7089, lr_0 = 7.7333e-04
Loss = 7.5612e-02, PNorm = 130.8725, GNorm = 0.6782, lr_0 = 7.7280e-04
Loss = 9.4987e-02, PNorm = 130.9327, GNorm = 0.4853, lr_0 = 7.7227e-04
Loss = 8.9107e-02, PNorm = 131.0041, GNorm = 0.7685, lr_0 = 7.7174e-04
Loss = 8.8061e-02, PNorm = 131.0711, GNorm = 0.7554, lr_0 = 7.7121e-04
Loss = 8.1157e-02, PNorm = 131.1417, GNorm = 0.3490, lr_0 = 7.7068e-04
Loss = 8.7387e-02, PNorm = 131.2030, GNorm = 0.4478, lr_0 = 7.7015e-04
Loss = 7.4562e-02, PNorm = 131.2661, GNorm = 0.4859, lr_0 = 7.6963e-04
Loss = 8.4951e-02, PNorm = 131.3205, GNorm = 0.4966, lr_0 = 7.6910e-04
Loss = 7.1906e-02, PNorm = 131.3832, GNorm = 0.9969, lr_0 = 7.6857e-04
Loss = 9.4520e-02, PNorm = 131.4373, GNorm = 1.4712, lr_0 = 7.6805e-04
Loss = 8.1327e-02, PNorm = 131.4980, GNorm = 0.5361, lr_0 = 7.6752e-04
Loss = 6.9842e-02, PNorm = 131.5641, GNorm = 0.6511, lr_0 = 7.6699e-04
Loss = 8.5865e-02, PNorm = 131.6252, GNorm = 0.6277, lr_0 = 7.6647e-04
Loss = 8.9900e-02, PNorm = 131.6995, GNorm = 0.6767, lr_0 = 7.6594e-04
Loss = 9.9143e-02, PNorm = 131.7740, GNorm = 1.0058, lr_0 = 7.6542e-04
Loss = 7.3064e-02, PNorm = 131.8512, GNorm = 0.6117, lr_0 = 7.6489e-04
Loss = 8.9928e-02, PNorm = 131.9244, GNorm = 0.8083, lr_0 = 7.6437e-04
Loss = 9.3696e-02, PNorm = 131.9909, GNorm = 0.4937, lr_0 = 7.6385e-04
Loss = 7.4289e-02, PNorm = 132.0607, GNorm = 0.6079, lr_0 = 7.6332e-04
Loss = 8.1596e-02, PNorm = 132.1262, GNorm = 0.5877, lr_0 = 7.6280e-04
Loss = 8.6066e-02, PNorm = 132.1970, GNorm = 0.5685, lr_0 = 7.6228e-04
Loss = 8.6206e-02, PNorm = 132.2611, GNorm = 0.7010, lr_0 = 7.6176e-04
Loss = 7.6741e-02, PNorm = 132.3240, GNorm = 0.7038, lr_0 = 7.6123e-04
Loss = 8.8474e-02, PNorm = 132.3914, GNorm = 0.7321, lr_0 = 7.6071e-04
Loss = 8.7930e-02, PNorm = 132.4559, GNorm = 0.7253, lr_0 = 7.6019e-04
Loss = 7.7228e-02, PNorm = 132.5280, GNorm = 0.3727, lr_0 = 7.5967e-04
Loss = 1.0418e-01, PNorm = 132.6023, GNorm = 0.8867, lr_0 = 7.5915e-04
Loss = 9.1950e-02, PNorm = 132.6782, GNorm = 0.6513, lr_0 = 7.5863e-04
Loss = 8.9298e-02, PNorm = 132.7499, GNorm = 0.6824, lr_0 = 7.5811e-04
Loss = 7.4924e-02, PNorm = 132.8236, GNorm = 0.5298, lr_0 = 7.5759e-04
Loss = 8.1393e-02, PNorm = 132.8920, GNorm = 0.5205, lr_0 = 7.5707e-04
Loss = 7.7717e-02, PNorm = 132.9591, GNorm = 0.4469, lr_0 = 7.5655e-04
Loss = 8.7535e-02, PNorm = 133.0215, GNorm = 0.8384, lr_0 = 7.5603e-04
Loss = 7.8855e-02, PNorm = 133.0911, GNorm = 0.6456, lr_0 = 7.5552e-04
Loss = 7.4120e-02, PNorm = 133.1657, GNorm = 0.9673, lr_0 = 7.5500e-04
Loss = 8.1361e-02, PNorm = 133.2339, GNorm = 0.9683, lr_0 = 7.5448e-04
Loss = 7.9041e-02, PNorm = 133.3059, GNorm = 0.8680, lr_0 = 7.5397e-04
Loss = 7.3138e-02, PNorm = 133.3672, GNorm = 0.5936, lr_0 = 7.5345e-04
Loss = 7.7915e-02, PNorm = 133.4338, GNorm = 1.2346, lr_0 = 7.5293e-04
Loss = 8.4121e-02, PNorm = 133.5030, GNorm = 0.4936, lr_0 = 7.5242e-04
Loss = 7.8138e-02, PNorm = 133.5736, GNorm = 0.5365, lr_0 = 7.5190e-04
Loss = 8.2488e-02, PNorm = 133.6350, GNorm = 0.8395, lr_0 = 7.5139e-04
Loss = 8.4520e-02, PNorm = 133.6981, GNorm = 0.5104, lr_0 = 7.5087e-04
Loss = 9.4156e-02, PNorm = 133.7654, GNorm = 0.5174, lr_0 = 7.5036e-04
Loss = 7.1959e-02, PNorm = 133.8297, GNorm = 0.7486, lr_0 = 7.4984e-04
Loss = 7.5519e-02, PNorm = 133.8997, GNorm = 0.5170, lr_0 = 7.4933e-04
Loss = 7.8776e-02, PNorm = 133.9736, GNorm = 0.8777, lr_0 = 7.4882e-04
Loss = 9.4599e-02, PNorm = 134.0459, GNorm = 0.6850, lr_0 = 7.4830e-04
Loss = 8.5453e-02, PNorm = 134.1192, GNorm = 0.6800, lr_0 = 7.4779e-04
Loss = 1.0119e-01, PNorm = 134.1963, GNorm = 1.0109, lr_0 = 7.4728e-04
Loss = 9.2688e-02, PNorm = 134.2826, GNorm = 0.7446, lr_0 = 7.4677e-04
Loss = 1.0015e-01, PNorm = 134.3584, GNorm = 0.5905, lr_0 = 7.4625e-04
Loss = 1.0463e-01, PNorm = 134.4504, GNorm = 0.9383, lr_0 = 7.4574e-04
Loss = 8.1949e-02, PNorm = 134.5274, GNorm = 0.8194, lr_0 = 7.4523e-04
Loss = 8.6637e-02, PNorm = 134.6050, GNorm = 0.4550, lr_0 = 7.4472e-04
Loss = 8.7196e-02, PNorm = 134.6874, GNorm = 0.5918, lr_0 = 7.4421e-04
Loss = 8.5772e-02, PNorm = 134.7612, GNorm = 1.6548, lr_0 = 7.4370e-04
Loss = 8.8892e-02, PNorm = 134.8355, GNorm = 0.5849, lr_0 = 7.4319e-04
Loss = 8.7865e-02, PNorm = 134.9150, GNorm = 0.9102, lr_0 = 7.4268e-04
Loss = 8.7238e-02, PNorm = 134.9946, GNorm = 0.6905, lr_0 = 7.4217e-04
Loss = 9.5199e-02, PNorm = 135.0822, GNorm = 0.7454, lr_0 = 7.4167e-04
Loss = 1.0293e-01, PNorm = 135.1739, GNorm = 0.8719, lr_0 = 7.4116e-04
Loss = 8.5759e-02, PNorm = 135.2689, GNorm = 0.6257, lr_0 = 7.4065e-04
Loss = 1.2457e-01, PNorm = 135.3516, GNorm = 0.5759, lr_0 = 7.4014e-04
Loss = 8.5937e-02, PNorm = 135.4363, GNorm = 0.9446, lr_0 = 7.3964e-04
Loss = 1.1560e-01, PNorm = 135.5193, GNorm = 0.6959, lr_0 = 7.3913e-04
Loss = 9.5878e-02, PNorm = 135.6050, GNorm = 0.6080, lr_0 = 7.3862e-04
Loss = 8.6453e-02, PNorm = 135.6925, GNorm = 0.5165, lr_0 = 7.3812e-04
Loss = 9.2039e-02, PNorm = 135.7777, GNorm = 0.5625, lr_0 = 7.3761e-04
Loss = 8.8740e-02, PNorm = 135.8648, GNorm = 0.5951, lr_0 = 7.3711e-04
Loss = 8.3677e-02, PNorm = 135.9502, GNorm = 0.6180, lr_0 = 7.3660e-04
Loss = 1.0134e-01, PNorm = 136.0324, GNorm = 0.8407, lr_0 = 7.3610e-04
Loss = 8.7940e-02, PNorm = 136.1163, GNorm = 0.5108, lr_0 = 7.3559e-04
Loss = 1.0774e-01, PNorm = 136.1969, GNorm = 0.7402, lr_0 = 7.3509e-04
Loss = 1.0395e-01, PNorm = 136.2874, GNorm = 0.7032, lr_0 = 7.3458e-04
Loss = 1.0352e-01, PNorm = 136.3738, GNorm = 0.7126, lr_0 = 7.3408e-04
Loss = 1.1588e-01, PNorm = 136.4688, GNorm = 0.5733, lr_0 = 7.3358e-04
Loss = 9.4395e-02, PNorm = 136.5639, GNorm = 0.5878, lr_0 = 7.3308e-04
Loss = 9.0036e-02, PNorm = 136.6604, GNorm = 0.6722, lr_0 = 7.3257e-04
Loss = 7.6232e-02, PNorm = 136.7460, GNorm = 1.1111, lr_0 = 7.3207e-04
Loss = 9.0640e-02, PNorm = 136.8151, GNorm = 0.4860, lr_0 = 7.3157e-04
Loss = 9.8701e-02, PNorm = 136.8930, GNorm = 0.8136, lr_0 = 7.3107e-04
Loss = 9.8694e-02, PNorm = 136.9768, GNorm = 0.4343, lr_0 = 7.3057e-04
Loss = 8.0408e-02, PNorm = 137.0573, GNorm = 0.4701, lr_0 = 7.3007e-04
Loss = 9.0949e-02, PNorm = 137.1412, GNorm = 0.5639, lr_0 = 7.2957e-04
Loss = 1.0160e-01, PNorm = 137.2182, GNorm = 1.3640, lr_0 = 7.2907e-04
Loss = 1.0932e-01, PNorm = 137.2975, GNorm = 0.5454, lr_0 = 7.2857e-04
Loss = 1.0791e-01, PNorm = 137.4012, GNorm = 0.8404, lr_0 = 7.2807e-04
Loss = 1.2045e-01, PNorm = 137.4940, GNorm = 0.5142, lr_0 = 7.2757e-04
Loss = 1.1404e-01, PNorm = 137.5854, GNorm = 1.4523, lr_0 = 7.2707e-04
Loss = 1.0190e-01, PNorm = 137.6813, GNorm = 1.0652, lr_0 = 7.2657e-04
Loss = 9.5589e-02, PNorm = 137.7735, GNorm = 0.6206, lr_0 = 7.2608e-04
Loss = 8.8360e-02, PNorm = 137.8661, GNorm = 1.1463, lr_0 = 7.2558e-04
Loss = 1.1348e-01, PNorm = 137.9647, GNorm = 0.7809, lr_0 = 7.2508e-04
Loss = 1.0986e-01, PNorm = 138.0507, GNorm = 0.6769, lr_0 = 7.2458e-04
Loss = 1.2888e-01, PNorm = 138.1461, GNorm = 1.3020, lr_0 = 7.2409e-04
Loss = 1.5181e-01, PNorm = 138.2409, GNorm = 0.4909, lr_0 = 7.2359e-04
Loss = 1.0605e-01, PNorm = 138.3438, GNorm = 0.4834, lr_0 = 7.2310e-04
Loss = 1.1287e-01, PNorm = 138.4438, GNorm = 0.5807, lr_0 = 7.2260e-04
Loss = 9.5147e-02, PNorm = 138.5443, GNorm = 1.4098, lr_0 = 7.2211e-04
Loss = 9.6010e-02, PNorm = 138.6321, GNorm = 1.0073, lr_0 = 7.2161e-04
Loss = 1.0459e-01, PNorm = 138.7094, GNorm = 1.1741, lr_0 = 7.2112e-04
Loss = 1.0907e-01, PNorm = 138.7981, GNorm = 1.4150, lr_0 = 7.2062e-04
Loss = 1.0543e-01, PNorm = 138.8931, GNorm = 0.5451, lr_0 = 7.2013e-04
Loss = 1.0491e-01, PNorm = 138.9822, GNorm = 1.4886, lr_0 = 7.1964e-04
Validation mae = 0.124185
Epoch 6
Loss = 6.3387e-02, PNorm = 139.0601, GNorm = 0.7113, lr_0 = 7.1914e-04
Loss = 6.4044e-02, PNorm = 139.1360, GNorm = 0.5289, lr_0 = 7.1865e-04
Loss = 6.2072e-02, PNorm = 139.1949, GNorm = 0.6766, lr_0 = 7.1816e-04
Loss = 6.5399e-02, PNorm = 139.2501, GNorm = 0.5251, lr_0 = 7.1767e-04
Loss = 7.5898e-02, PNorm = 139.3061, GNorm = 0.6063, lr_0 = 7.1717e-04
Loss = 6.0624e-02, PNorm = 139.3603, GNorm = 0.4612, lr_0 = 7.1668e-04
Loss = 6.0183e-02, PNorm = 139.4103, GNorm = 0.8756, lr_0 = 7.1619e-04
Loss = 6.1233e-02, PNorm = 139.4587, GNorm = 0.4948, lr_0 = 7.1570e-04
Loss = 7.0272e-02, PNorm = 139.5102, GNorm = 0.5748, lr_0 = 7.1521e-04
Loss = 4.8566e-02, PNorm = 139.5594, GNorm = 0.3676, lr_0 = 7.1472e-04
Loss = 5.4889e-02, PNorm = 139.6059, GNorm = 0.3812, lr_0 = 7.1423e-04
Loss = 6.4040e-02, PNorm = 139.6519, GNorm = 0.6203, lr_0 = 7.1374e-04
Loss = 6.9258e-02, PNorm = 139.7048, GNorm = 0.6241, lr_0 = 7.1325e-04
Loss = 6.6101e-02, PNorm = 139.7561, GNorm = 0.6614, lr_0 = 7.1277e-04
Loss = 5.6965e-02, PNorm = 139.8060, GNorm = 0.6601, lr_0 = 7.1228e-04
Loss = 6.4936e-02, PNorm = 139.8626, GNorm = 0.5914, lr_0 = 7.1179e-04
Loss = 6.0446e-02, PNorm = 139.9108, GNorm = 0.8559, lr_0 = 7.1130e-04
Loss = 5.6065e-02, PNorm = 139.9669, GNorm = 0.4709, lr_0 = 7.1081e-04
Loss = 6.3883e-02, PNorm = 140.0179, GNorm = 0.4971, lr_0 = 7.1033e-04
Loss = 6.8807e-02, PNorm = 140.0755, GNorm = 0.6863, lr_0 = 7.0984e-04
Loss = 6.6216e-02, PNorm = 140.1261, GNorm = 0.5142, lr_0 = 7.0935e-04
Loss = 5.6078e-02, PNorm = 140.1757, GNorm = 0.5115, lr_0 = 7.0887e-04
Loss = 6.5232e-02, PNorm = 140.2335, GNorm = 0.5443, lr_0 = 7.0838e-04
Loss = 5.5225e-02, PNorm = 140.2836, GNorm = 0.5011, lr_0 = 7.0790e-04
Loss = 5.8247e-02, PNorm = 140.3348, GNorm = 0.6182, lr_0 = 7.0741e-04
Loss = 6.5486e-02, PNorm = 140.3915, GNorm = 0.5645, lr_0 = 7.0693e-04
Loss = 6.1426e-02, PNorm = 140.4495, GNorm = 0.2791, lr_0 = 7.0644e-04
Loss = 6.1880e-02, PNorm = 140.4962, GNorm = 0.5670, lr_0 = 7.0596e-04
Loss = 5.1267e-02, PNorm = 140.5453, GNorm = 0.4049, lr_0 = 7.0548e-04
Loss = 5.6240e-02, PNorm = 140.5909, GNorm = 0.5713, lr_0 = 7.0499e-04
Loss = 7.1618e-02, PNorm = 140.6481, GNorm = 0.6870, lr_0 = 7.0451e-04
Loss = 5.8010e-02, PNorm = 140.7023, GNorm = 0.5242, lr_0 = 7.0403e-04
Loss = 5.8122e-02, PNorm = 140.7604, GNorm = 0.7872, lr_0 = 7.0354e-04
Loss = 5.5761e-02, PNorm = 140.8172, GNorm = 0.5083, lr_0 = 7.0306e-04
Loss = 7.4820e-02, PNorm = 140.8708, GNorm = 0.4126, lr_0 = 7.0258e-04
Loss = 5.6986e-02, PNorm = 140.9275, GNorm = 0.4144, lr_0 = 7.0210e-04
Loss = 5.8308e-02, PNorm = 140.9848, GNorm = 0.5477, lr_0 = 7.0162e-04
Loss = 5.6911e-02, PNorm = 141.0452, GNorm = 0.9227, lr_0 = 7.0114e-04
Loss = 6.8007e-02, PNorm = 141.0931, GNorm = 0.4100, lr_0 = 7.0066e-04
Loss = 5.8231e-02, PNorm = 141.1495, GNorm = 0.8499, lr_0 = 7.0018e-04
Loss = 5.5474e-02, PNorm = 141.2087, GNorm = 0.7141, lr_0 = 6.9970e-04
Loss = 4.9658e-02, PNorm = 141.2602, GNorm = 0.3717, lr_0 = 6.9922e-04
Loss = 5.5862e-02, PNorm = 141.3169, GNorm = 0.7337, lr_0 = 6.9874e-04
Loss = 6.6526e-02, PNorm = 141.3692, GNorm = 0.6467, lr_0 = 6.9826e-04
Loss = 5.5075e-02, PNorm = 141.4223, GNorm = 1.0744, lr_0 = 6.9778e-04
Loss = 6.5592e-02, PNorm = 141.4825, GNorm = 0.4300, lr_0 = 6.9730e-04
Loss = 5.6523e-02, PNorm = 141.5348, GNorm = 0.4748, lr_0 = 6.9683e-04
Loss = 6.5270e-02, PNorm = 141.5844, GNorm = 0.5900, lr_0 = 6.9635e-04
Loss = 5.5606e-02, PNorm = 141.6390, GNorm = 0.4118, lr_0 = 6.9587e-04
Loss = 5.7967e-02, PNorm = 141.6968, GNorm = 0.5368, lr_0 = 6.9540e-04
Loss = 6.4485e-02, PNorm = 141.7481, GNorm = 0.6107, lr_0 = 6.9492e-04
Loss = 6.8734e-02, PNorm = 141.8018, GNorm = 0.4336, lr_0 = 6.9444e-04
Loss = 6.3175e-02, PNorm = 141.8593, GNorm = 0.4630, lr_0 = 6.9397e-04
Loss = 6.2525e-02, PNorm = 141.9195, GNorm = 0.6614, lr_0 = 6.9349e-04
Loss = 6.1017e-02, PNorm = 141.9762, GNorm = 0.4473, lr_0 = 6.9302e-04
Loss = 5.3334e-02, PNorm = 142.0363, GNorm = 0.3561, lr_0 = 6.9254e-04
Loss = 7.0390e-02, PNorm = 142.0975, GNorm = 0.8483, lr_0 = 6.9207e-04
Loss = 5.9563e-02, PNorm = 142.1567, GNorm = 1.9158, lr_0 = 6.9159e-04
Loss = 6.1658e-02, PNorm = 142.2107, GNorm = 0.4922, lr_0 = 6.9112e-04
Loss = 5.7520e-02, PNorm = 142.2689, GNorm = 0.7171, lr_0 = 6.9065e-04
Loss = 7.7143e-02, PNorm = 142.3249, GNorm = 0.8718, lr_0 = 6.9017e-04
Loss = 5.6226e-02, PNorm = 142.3864, GNorm = 0.3502, lr_0 = 6.8970e-04
Loss = 5.8838e-02, PNorm = 142.4496, GNorm = 0.5945, lr_0 = 6.8923e-04
Loss = 6.4958e-02, PNorm = 142.5012, GNorm = 0.4706, lr_0 = 6.8876e-04
Loss = 5.7747e-02, PNorm = 142.5579, GNorm = 0.6111, lr_0 = 6.8828e-04
Loss = 6.2280e-02, PNorm = 142.6133, GNorm = 0.9172, lr_0 = 6.8781e-04
Loss = 7.7592e-02, PNorm = 142.6731, GNorm = 0.5934, lr_0 = 6.8734e-04
Loss = 6.3158e-02, PNorm = 142.7395, GNorm = 0.4161, lr_0 = 6.8687e-04
Loss = 5.9315e-02, PNorm = 142.7985, GNorm = 0.5798, lr_0 = 6.8640e-04
Loss = 7.0725e-02, PNorm = 142.8568, GNorm = 0.8750, lr_0 = 6.8593e-04
Loss = 5.7520e-02, PNorm = 142.9182, GNorm = 0.3277, lr_0 = 6.8546e-04
Loss = 6.7604e-02, PNorm = 142.9732, GNorm = 0.5992, lr_0 = 6.8499e-04
Loss = 6.0952e-02, PNorm = 143.0301, GNorm = 0.5605, lr_0 = 6.8452e-04
Loss = 6.2080e-02, PNorm = 143.0871, GNorm = 0.6973, lr_0 = 6.8405e-04
Loss = 6.6313e-02, PNorm = 143.1445, GNorm = 0.6101, lr_0 = 6.8358e-04
Loss = 7.2645e-02, PNorm = 143.2070, GNorm = 0.7626, lr_0 = 6.8312e-04
Loss = 6.7336e-02, PNorm = 143.2746, GNorm = 0.8708, lr_0 = 6.8265e-04
Loss = 6.3956e-02, PNorm = 143.3343, GNorm = 0.8926, lr_0 = 6.8218e-04
Loss = 6.4354e-02, PNorm = 143.4084, GNorm = 0.6393, lr_0 = 6.8171e-04
Loss = 6.9065e-02, PNorm = 143.4705, GNorm = 0.5273, lr_0 = 6.8125e-04
Loss = 6.4716e-02, PNorm = 143.5314, GNorm = 0.5427, lr_0 = 6.8078e-04
Loss = 6.3443e-02, PNorm = 143.6036, GNorm = 0.4385, lr_0 = 6.8031e-04
Loss = 6.3090e-02, PNorm = 143.6735, GNorm = 1.1727, lr_0 = 6.7985e-04
Loss = 8.1610e-02, PNorm = 143.7374, GNorm = 1.0633, lr_0 = 6.7938e-04
Loss = 6.1995e-02, PNorm = 143.8149, GNorm = 0.4569, lr_0 = 6.7892e-04
Loss = 6.1360e-02, PNorm = 143.8748, GNorm = 0.6136, lr_0 = 6.7845e-04
Loss = 6.0285e-02, PNorm = 143.9405, GNorm = 0.4202, lr_0 = 6.7799e-04
Loss = 6.4523e-02, PNorm = 144.0041, GNorm = 0.5405, lr_0 = 6.7752e-04
Loss = 7.6126e-02, PNorm = 144.0704, GNorm = 0.9271, lr_0 = 6.7706e-04
Loss = 7.0618e-02, PNorm = 144.1403, GNorm = 0.7904, lr_0 = 6.7659e-04
Loss = 8.9001e-02, PNorm = 144.2156, GNorm = 0.7841, lr_0 = 6.7613e-04
Loss = 7.0639e-02, PNorm = 144.2886, GNorm = 0.5769, lr_0 = 6.7567e-04
Loss = 7.0532e-02, PNorm = 144.3601, GNorm = 0.5367, lr_0 = 6.7520e-04
Loss = 7.6300e-02, PNorm = 144.4400, GNorm = 0.6963, lr_0 = 6.7474e-04
Loss = 6.8392e-02, PNorm = 144.5184, GNorm = 1.0177, lr_0 = 6.7428e-04
Loss = 6.4828e-02, PNorm = 144.5918, GNorm = 1.0567, lr_0 = 6.7382e-04
Loss = 6.2845e-02, PNorm = 144.6631, GNorm = 0.4206, lr_0 = 6.7335e-04
Loss = 7.1938e-02, PNorm = 144.7277, GNorm = 0.3988, lr_0 = 6.7289e-04
Loss = 9.9849e-02, PNorm = 144.8017, GNorm = 1.7546, lr_0 = 6.7243e-04
Loss = 7.4404e-02, PNorm = 144.8780, GNorm = 0.4290, lr_0 = 6.7197e-04
Loss = 6.4475e-02, PNorm = 144.9570, GNorm = 0.5092, lr_0 = 6.7151e-04
Loss = 7.6168e-02, PNorm = 145.0296, GNorm = 0.5261, lr_0 = 6.7105e-04
Loss = 7.2766e-02, PNorm = 145.1025, GNorm = 0.8459, lr_0 = 6.7059e-04
Loss = 8.0110e-02, PNorm = 145.1759, GNorm = 0.3388, lr_0 = 6.7013e-04
Loss = 7.3602e-02, PNorm = 145.2578, GNorm = 0.4599, lr_0 = 6.6967e-04
Loss = 7.0440e-02, PNorm = 145.3312, GNorm = 0.4935, lr_0 = 6.6921e-04
Loss = 6.6615e-02, PNorm = 145.4068, GNorm = 0.8006, lr_0 = 6.6876e-04
Loss = 7.6882e-02, PNorm = 145.4814, GNorm = 0.4480, lr_0 = 6.6830e-04
Loss = 8.4003e-02, PNorm = 145.5592, GNorm = 0.6330, lr_0 = 6.6784e-04
Loss = 8.7721e-02, PNorm = 145.6341, GNorm = 0.5854, lr_0 = 6.6738e-04
Loss = 9.9448e-02, PNorm = 145.7104, GNorm = 0.7494, lr_0 = 6.6693e-04
Loss = 7.1286e-02, PNorm = 145.7843, GNorm = 0.7142, lr_0 = 6.6647e-04
Loss = 7.3800e-02, PNorm = 145.8539, GNorm = 0.6551, lr_0 = 6.6601e-04
Loss = 6.8356e-02, PNorm = 145.9261, GNorm = 0.3257, lr_0 = 6.6556e-04
Loss = 7.5552e-02, PNorm = 145.9990, GNorm = 0.5141, lr_0 = 6.6510e-04
Loss = 6.1844e-02, PNorm = 146.0724, GNorm = 0.5522, lr_0 = 6.6464e-04
Loss = 6.6055e-02, PNorm = 146.1429, GNorm = 0.4888, lr_0 = 6.6419e-04
Loss = 9.0944e-02, PNorm = 146.2164, GNorm = 0.9122, lr_0 = 6.6373e-04
Loss = 5.8599e-02, PNorm = 146.2911, GNorm = 0.4408, lr_0 = 6.6328e-04
Loss = 6.8231e-02, PNorm = 146.3602, GNorm = 0.5441, lr_0 = 6.6282e-04
Validation mae = 0.124381
Epoch 7
Loss = 5.3291e-02, PNorm = 146.4241, GNorm = 0.4637, lr_0 = 6.6237e-04
Loss = 5.7416e-02, PNorm = 146.4842, GNorm = 1.0230, lr_0 = 6.6192e-04
Loss = 5.4332e-02, PNorm = 146.5414, GNorm = 0.4647, lr_0 = 6.6146e-04
Loss = 5.2264e-02, PNorm = 146.5867, GNorm = 0.5290, lr_0 = 6.6101e-04
Loss = 5.2324e-02, PNorm = 146.6248, GNorm = 0.9218, lr_0 = 6.6056e-04
Loss = 4.9535e-02, PNorm = 146.6718, GNorm = 0.6018, lr_0 = 6.6011e-04
Loss = 6.2251e-02, PNorm = 146.7195, GNorm = 0.4159, lr_0 = 6.5965e-04
Loss = 5.7961e-02, PNorm = 146.7698, GNorm = 0.4483, lr_0 = 6.5920e-04
Loss = 6.6551e-02, PNorm = 146.8262, GNorm = 0.4282, lr_0 = 6.5875e-04
Loss = 5.5688e-02, PNorm = 146.8788, GNorm = 0.9538, lr_0 = 6.5830e-04
Loss = 4.7729e-02, PNorm = 146.9246, GNorm = 0.4156, lr_0 = 6.5785e-04
Loss = 4.8418e-02, PNorm = 146.9722, GNorm = 0.5370, lr_0 = 6.5740e-04
Loss = 4.9365e-02, PNorm = 147.0245, GNorm = 0.7556, lr_0 = 6.5695e-04
Loss = 6.6704e-02, PNorm = 147.0668, GNorm = 0.5432, lr_0 = 6.5650e-04
Loss = 4.5572e-02, PNorm = 147.1181, GNorm = 0.4705, lr_0 = 6.5605e-04
Loss = 5.1288e-02, PNorm = 147.1645, GNorm = 0.5368, lr_0 = 6.5560e-04
Loss = 4.3636e-02, PNorm = 147.2116, GNorm = 0.6746, lr_0 = 6.5515e-04
Loss = 5.4351e-02, PNorm = 147.2557, GNorm = 0.4596, lr_0 = 6.5470e-04
Loss = 5.0085e-02, PNorm = 147.3005, GNorm = 0.4083, lr_0 = 6.5425e-04
Loss = 3.9815e-02, PNorm = 147.3506, GNorm = 0.6258, lr_0 = 6.5380e-04
Loss = 4.4939e-02, PNorm = 147.3953, GNorm = 0.4551, lr_0 = 6.5335e-04
Loss = 4.5665e-02, PNorm = 147.4406, GNorm = 0.3561, lr_0 = 6.5291e-04
Loss = 4.5538e-02, PNorm = 147.4861, GNorm = 0.5761, lr_0 = 6.5246e-04
Loss = 5.3336e-02, PNorm = 147.5316, GNorm = 0.5150, lr_0 = 6.5201e-04
Loss = 5.3144e-02, PNorm = 147.5803, GNorm = 0.4260, lr_0 = 6.5157e-04
Loss = 4.2350e-02, PNorm = 147.6278, GNorm = 0.2971, lr_0 = 6.5112e-04
Loss = 4.9918e-02, PNorm = 147.6799, GNorm = 0.4409, lr_0 = 6.5067e-04
Loss = 4.5498e-02, PNorm = 147.7233, GNorm = 0.4139, lr_0 = 6.5023e-04
Loss = 4.9177e-02, PNorm = 147.7663, GNorm = 0.3350, lr_0 = 6.4978e-04
Loss = 4.9934e-02, PNorm = 147.8201, GNorm = 0.5226, lr_0 = 6.4934e-04
Loss = 5.0526e-02, PNorm = 147.8699, GNorm = 0.3238, lr_0 = 6.4889e-04
Loss = 4.8986e-02, PNorm = 147.9178, GNorm = 0.3727, lr_0 = 6.4845e-04
Loss = 4.4276e-02, PNorm = 147.9655, GNorm = 0.3181, lr_0 = 6.4800e-04
Loss = 4.6490e-02, PNorm = 148.0101, GNorm = 0.3161, lr_0 = 6.4756e-04
Loss = 5.5908e-02, PNorm = 148.0592, GNorm = 0.6681, lr_0 = 6.4712e-04
Loss = 5.3701e-02, PNorm = 148.1041, GNorm = 0.3727, lr_0 = 6.4667e-04
Loss = 5.3158e-02, PNorm = 148.1584, GNorm = 0.6455, lr_0 = 6.4623e-04
Loss = 5.1603e-02, PNorm = 148.2193, GNorm = 0.3535, lr_0 = 6.4579e-04
Loss = 5.0943e-02, PNorm = 148.2633, GNorm = 0.3521, lr_0 = 6.4534e-04
Loss = 5.1005e-02, PNorm = 148.3113, GNorm = 0.5379, lr_0 = 6.4490e-04
Loss = 3.9095e-02, PNorm = 148.3577, GNorm = 0.3859, lr_0 = 6.4446e-04
Loss = 5.2720e-02, PNorm = 148.4130, GNorm = 0.4488, lr_0 = 6.4402e-04
Loss = 5.3668e-02, PNorm = 148.4665, GNorm = 0.5782, lr_0 = 6.4358e-04
Loss = 6.1158e-02, PNorm = 148.5223, GNorm = 0.4443, lr_0 = 6.4314e-04
Loss = 5.1160e-02, PNorm = 148.5786, GNorm = 0.5397, lr_0 = 6.4270e-04
Loss = 5.5266e-02, PNorm = 148.6373, GNorm = 0.9907, lr_0 = 6.4226e-04
Loss = 5.7117e-02, PNorm = 148.6889, GNorm = 0.7663, lr_0 = 6.4182e-04
Loss = 4.5816e-02, PNorm = 148.7351, GNorm = 0.5383, lr_0 = 6.4138e-04
Loss = 4.6738e-02, PNorm = 148.7835, GNorm = 0.5916, lr_0 = 6.4094e-04
Loss = 4.2361e-02, PNorm = 148.8331, GNorm = 0.6224, lr_0 = 6.4050e-04
Loss = 5.0869e-02, PNorm = 148.8814, GNorm = 0.4209, lr_0 = 6.4006e-04
Loss = 5.0234e-02, PNorm = 148.9359, GNorm = 0.6374, lr_0 = 6.3962e-04
Loss = 5.0130e-02, PNorm = 148.9795, GNorm = 0.4612, lr_0 = 6.3918e-04
Loss = 4.6967e-02, PNorm = 149.0327, GNorm = 0.3999, lr_0 = 6.3874e-04
Loss = 4.8174e-02, PNorm = 149.0847, GNorm = 0.4242, lr_0 = 6.3831e-04
Loss = 5.0125e-02, PNorm = 149.1376, GNorm = 0.5290, lr_0 = 6.3787e-04
Loss = 4.1624e-02, PNorm = 149.1942, GNorm = 0.3596, lr_0 = 6.3743e-04
Loss = 6.0721e-02, PNorm = 149.2463, GNorm = 0.7579, lr_0 = 6.3700e-04
Loss = 4.7925e-02, PNorm = 149.2961, GNorm = 0.3883, lr_0 = 6.3656e-04
Loss = 4.1955e-02, PNorm = 149.3478, GNorm = 0.7194, lr_0 = 6.3612e-04
Loss = 4.5045e-02, PNorm = 149.3976, GNorm = 0.6057, lr_0 = 6.3569e-04
Loss = 4.8056e-02, PNorm = 149.4499, GNorm = 0.6584, lr_0 = 6.3525e-04
Loss = 5.2091e-02, PNorm = 149.5050, GNorm = 0.8033, lr_0 = 6.3482e-04
Loss = 4.9930e-02, PNorm = 149.5533, GNorm = 0.3964, lr_0 = 6.3438e-04
Loss = 5.0257e-02, PNorm = 149.6038, GNorm = 0.7633, lr_0 = 6.3395e-04
Loss = 5.6337e-02, PNorm = 149.6551, GNorm = 0.6004, lr_0 = 6.3351e-04
Loss = 4.3025e-02, PNorm = 149.7047, GNorm = 0.6792, lr_0 = 6.3308e-04
Loss = 4.5870e-02, PNorm = 149.7600, GNorm = 0.8058, lr_0 = 6.3265e-04
Loss = 5.3625e-02, PNorm = 149.8091, GNorm = 0.5345, lr_0 = 6.3221e-04
Loss = 5.3149e-02, PNorm = 149.8603, GNorm = 0.6828, lr_0 = 6.3178e-04
Loss = 5.8544e-02, PNorm = 149.9107, GNorm = 0.6603, lr_0 = 6.3135e-04
Loss = 5.6964e-02, PNorm = 149.9657, GNorm = 0.8625, lr_0 = 6.3091e-04
Loss = 4.5744e-02, PNorm = 150.0215, GNorm = 0.4768, lr_0 = 6.3048e-04
Loss = 6.8844e-02, PNorm = 150.0794, GNorm = 1.0928, lr_0 = 6.3005e-04
Loss = 5.8225e-02, PNorm = 150.1371, GNorm = 0.4562, lr_0 = 6.2962e-04
Loss = 4.4331e-02, PNorm = 150.1964, GNorm = 0.7482, lr_0 = 6.2919e-04
Loss = 5.4716e-02, PNorm = 150.2467, GNorm = 0.4734, lr_0 = 6.2876e-04
Loss = 5.8168e-02, PNorm = 150.3027, GNorm = 0.6220, lr_0 = 6.2833e-04
Loss = 5.7558e-02, PNorm = 150.3578, GNorm = 0.3666, lr_0 = 6.2789e-04
Loss = 4.5706e-02, PNorm = 150.4128, GNorm = 0.4768, lr_0 = 6.2746e-04
Loss = 4.9915e-02, PNorm = 150.4689, GNorm = 0.4924, lr_0 = 6.2703e-04
Loss = 5.0012e-02, PNorm = 150.5237, GNorm = 0.4009, lr_0 = 6.2661e-04
Loss = 5.0986e-02, PNorm = 150.5844, GNorm = 0.3831, lr_0 = 6.2618e-04
Loss = 4.4754e-02, PNorm = 150.6416, GNorm = 0.4741, lr_0 = 6.2575e-04
Loss = 5.7997e-02, PNorm = 150.6995, GNorm = 0.5900, lr_0 = 6.2532e-04
Loss = 4.7370e-02, PNorm = 150.7573, GNorm = 1.6578, lr_0 = 6.2489e-04
Loss = 5.6647e-02, PNorm = 150.8142, GNorm = 0.5994, lr_0 = 6.2446e-04
Loss = 5.6667e-02, PNorm = 150.8697, GNorm = 0.3339, lr_0 = 6.2403e-04
Loss = 3.6329e-02, PNorm = 150.9273, GNorm = 0.3116, lr_0 = 6.2361e-04
Loss = 4.3629e-02, PNorm = 150.9806, GNorm = 0.9638, lr_0 = 6.2318e-04
Loss = 4.1851e-02, PNorm = 151.0315, GNorm = 0.4001, lr_0 = 6.2275e-04
Loss = 5.0792e-02, PNorm = 151.0877, GNorm = 0.6183, lr_0 = 6.2233e-04
Loss = 5.3311e-02, PNorm = 151.1375, GNorm = 0.6348, lr_0 = 6.2190e-04
Loss = 5.6310e-02, PNorm = 151.1957, GNorm = 0.7725, lr_0 = 6.2147e-04
Loss = 5.5946e-02, PNorm = 151.2544, GNorm = 0.6152, lr_0 = 6.2105e-04
Loss = 4.8233e-02, PNorm = 151.3147, GNorm = 0.5262, lr_0 = 6.2062e-04
Loss = 4.0877e-02, PNorm = 151.3747, GNorm = 0.3424, lr_0 = 6.2020e-04
Loss = 4.7113e-02, PNorm = 151.4286, GNorm = 0.3836, lr_0 = 6.1977e-04
Loss = 5.2220e-02, PNorm = 151.4841, GNorm = 0.6451, lr_0 = 6.1935e-04
Loss = 5.2056e-02, PNorm = 151.5429, GNorm = 0.3390, lr_0 = 6.1892e-04
Loss = 5.0570e-02, PNorm = 151.6020, GNorm = 0.4673, lr_0 = 6.1850e-04
Loss = 4.2451e-02, PNorm = 151.6552, GNorm = 0.3748, lr_0 = 6.1808e-04
Loss = 5.0941e-02, PNorm = 151.7109, GNorm = 0.6874, lr_0 = 6.1765e-04
Loss = 4.5673e-02, PNorm = 151.7697, GNorm = 0.5256, lr_0 = 6.1723e-04
Loss = 5.1497e-02, PNorm = 151.8268, GNorm = 1.0872, lr_0 = 6.1681e-04
Loss = 4.5511e-02, PNorm = 151.8828, GNorm = 0.5353, lr_0 = 6.1638e-04
Loss = 4.8938e-02, PNorm = 151.9355, GNorm = 0.3596, lr_0 = 6.1596e-04
Loss = 5.6800e-02, PNorm = 151.9944, GNorm = 1.1892, lr_0 = 6.1554e-04
Loss = 6.4345e-02, PNorm = 152.0607, GNorm = 0.6000, lr_0 = 6.1512e-04
Loss = 5.7927e-02, PNorm = 152.1182, GNorm = 0.5398, lr_0 = 6.1470e-04
Loss = 5.5967e-02, PNorm = 152.1860, GNorm = 0.4813, lr_0 = 6.1428e-04
Loss = 6.2551e-02, PNorm = 152.2542, GNorm = 0.3943, lr_0 = 6.1385e-04
Loss = 5.4623e-02, PNorm = 152.3167, GNorm = 0.6289, lr_0 = 6.1343e-04
Loss = 8.0210e-02, PNorm = 152.3865, GNorm = 0.5282, lr_0 = 6.1301e-04
Loss = 5.5923e-02, PNorm = 152.4499, GNorm = 0.6745, lr_0 = 6.1259e-04
Loss = 5.1670e-02, PNorm = 152.5148, GNorm = 0.7183, lr_0 = 6.1217e-04
Loss = 6.4867e-02, PNorm = 152.5797, GNorm = 0.6538, lr_0 = 6.1175e-04
Loss = 5.4001e-02, PNorm = 152.6458, GNorm = 1.1150, lr_0 = 6.1134e-04
Loss = 4.7327e-02, PNorm = 152.7031, GNorm = 0.4177, lr_0 = 6.1092e-04
Loss = 4.7894e-02, PNorm = 152.7616, GNorm = 0.6102, lr_0 = 6.1050e-04
Validation mae = 0.123227
Epoch 8
Loss = 5.1117e-02, PNorm = 152.8153, GNorm = 0.6282, lr_0 = 6.1008e-04
Loss = 5.0182e-02, PNorm = 152.8644, GNorm = 0.4413, lr_0 = 6.0966e-04
Loss = 4.3464e-02, PNorm = 152.9076, GNorm = 0.5340, lr_0 = 6.0924e-04
Loss = 4.4915e-02, PNorm = 152.9504, GNorm = 0.5189, lr_0 = 6.0883e-04
Loss = 3.9937e-02, PNorm = 152.9905, GNorm = 0.3611, lr_0 = 6.0841e-04
Loss = 4.1732e-02, PNorm = 153.0258, GNorm = 0.6026, lr_0 = 6.0799e-04
Loss = 4.0189e-02, PNorm = 153.0623, GNorm = 0.4259, lr_0 = 6.0758e-04
Loss = 4.1713e-02, PNorm = 153.0955, GNorm = 0.4302, lr_0 = 6.0716e-04
Loss = 3.7950e-02, PNorm = 153.1321, GNorm = 0.3945, lr_0 = 6.0674e-04
Loss = 4.7813e-02, PNorm = 153.1674, GNorm = 0.4112, lr_0 = 6.0633e-04
Loss = 4.7084e-02, PNorm = 153.1995, GNorm = 0.4905, lr_0 = 6.0591e-04
Loss = 5.1439e-02, PNorm = 153.2385, GNorm = 0.3828, lr_0 = 6.0550e-04
Loss = 3.4623e-02, PNorm = 153.2729, GNorm = 0.5559, lr_0 = 6.0508e-04
Loss = 5.0153e-02, PNorm = 153.3107, GNorm = 0.6039, lr_0 = 6.0467e-04
Loss = 5.0368e-02, PNorm = 153.3541, GNorm = 0.4657, lr_0 = 6.0425e-04
Loss = 3.9929e-02, PNorm = 153.3938, GNorm = 0.2828, lr_0 = 6.0384e-04
Loss = 4.5100e-02, PNorm = 153.4380, GNorm = 0.8815, lr_0 = 6.0343e-04
Loss = 4.5083e-02, PNorm = 153.4772, GNorm = 0.9098, lr_0 = 6.0301e-04
Loss = 3.1870e-02, PNorm = 153.5169, GNorm = 0.2625, lr_0 = 6.0260e-04
Loss = 4.0427e-02, PNorm = 153.5563, GNorm = 0.5582, lr_0 = 6.0219e-04
Loss = 4.2307e-02, PNorm = 153.5996, GNorm = 0.7003, lr_0 = 6.0178e-04
Loss = 4.6598e-02, PNorm = 153.6357, GNorm = 0.5276, lr_0 = 6.0136e-04
Loss = 3.6393e-02, PNorm = 153.6741, GNorm = 0.5389, lr_0 = 6.0095e-04
Loss = 3.7605e-02, PNorm = 153.7073, GNorm = 0.4901, lr_0 = 6.0054e-04
Loss = 3.7860e-02, PNorm = 153.7422, GNorm = 0.2544, lr_0 = 6.0013e-04
Loss = 3.7447e-02, PNorm = 153.7720, GNorm = 0.4718, lr_0 = 5.9972e-04
Loss = 3.6453e-02, PNorm = 153.8087, GNorm = 0.4985, lr_0 = 5.9931e-04
Loss = 3.9138e-02, PNorm = 153.8508, GNorm = 0.6706, lr_0 = 5.9890e-04
Loss = 3.5077e-02, PNorm = 153.8905, GNorm = 0.5840, lr_0 = 5.9849e-04
Loss = 3.6801e-02, PNorm = 153.9295, GNorm = 0.3453, lr_0 = 5.9808e-04
Loss = 4.1502e-02, PNorm = 153.9722, GNorm = 1.0069, lr_0 = 5.9767e-04
Loss = 4.0191e-02, PNorm = 154.0148, GNorm = 0.4833, lr_0 = 5.9726e-04
Loss = 3.5115e-02, PNorm = 154.0526, GNorm = 0.5783, lr_0 = 5.9685e-04
Loss = 3.1047e-02, PNorm = 154.0975, GNorm = 0.2757, lr_0 = 5.9644e-04
Loss = 3.6514e-02, PNorm = 154.1416, GNorm = 0.3523, lr_0 = 5.9603e-04
Loss = 4.3885e-02, PNorm = 154.1826, GNorm = 0.5879, lr_0 = 5.9562e-04
Loss = 4.6475e-02, PNorm = 154.2216, GNorm = 0.4795, lr_0 = 5.9521e-04
Loss = 4.2232e-02, PNorm = 154.2616, GNorm = 0.3394, lr_0 = 5.9481e-04
Loss = 3.5650e-02, PNorm = 154.3057, GNorm = 0.3553, lr_0 = 5.9440e-04
Loss = 3.8019e-02, PNorm = 154.3482, GNorm = 0.6261, lr_0 = 5.9399e-04
Loss = 4.0166e-02, PNorm = 154.3861, GNorm = 0.2775, lr_0 = 5.9358e-04
Loss = 4.1748e-02, PNorm = 154.4265, GNorm = 0.6790, lr_0 = 5.9318e-04
Loss = 4.2515e-02, PNorm = 154.4758, GNorm = 0.5506, lr_0 = 5.9277e-04
Loss = 4.7028e-02, PNorm = 154.5200, GNorm = 0.8285, lr_0 = 5.9236e-04
Loss = 3.3778e-02, PNorm = 154.5646, GNorm = 0.2997, lr_0 = 5.9196e-04
Loss = 3.7936e-02, PNorm = 154.6089, GNorm = 0.3343, lr_0 = 5.9155e-04
Loss = 3.6700e-02, PNorm = 154.6525, GNorm = 0.6836, lr_0 = 5.9115e-04
Loss = 4.1358e-02, PNorm = 154.6956, GNorm = 0.8530, lr_0 = 5.9074e-04
Loss = 3.0701e-02, PNorm = 154.7430, GNorm = 0.2717, lr_0 = 5.9034e-04
Loss = 3.8004e-02, PNorm = 154.7828, GNorm = 0.6228, lr_0 = 5.8993e-04
Loss = 4.6378e-02, PNorm = 154.8241, GNorm = 0.4914, lr_0 = 5.8953e-04
Loss = 4.0872e-02, PNorm = 154.8638, GNorm = 0.5213, lr_0 = 5.8913e-04
Loss = 3.7777e-02, PNorm = 154.9036, GNorm = 0.5128, lr_0 = 5.8872e-04
Loss = 3.4519e-02, PNorm = 154.9473, GNorm = 0.2906, lr_0 = 5.8832e-04
Loss = 4.1405e-02, PNorm = 154.9947, GNorm = 0.7235, lr_0 = 5.8792e-04
Loss = 3.8353e-02, PNorm = 155.0380, GNorm = 0.6096, lr_0 = 5.8751e-04
Loss = 3.7630e-02, PNorm = 155.0854, GNorm = 0.3375, lr_0 = 5.8711e-04
Loss = 3.6446e-02, PNorm = 155.1289, GNorm = 0.6550, lr_0 = 5.8671e-04
Loss = 3.6819e-02, PNorm = 155.1700, GNorm = 0.6341, lr_0 = 5.8631e-04
Loss = 3.7702e-02, PNorm = 155.2178, GNorm = 0.8363, lr_0 = 5.8591e-04
Loss = 3.4859e-02, PNorm = 155.2572, GNorm = 0.3167, lr_0 = 5.8550e-04
Loss = 4.0517e-02, PNorm = 155.3025, GNorm = 0.5101, lr_0 = 5.8510e-04
Loss = 4.3824e-02, PNorm = 155.3486, GNorm = 0.5323, lr_0 = 5.8470e-04
Loss = 4.9223e-02, PNorm = 155.3886, GNorm = 0.7534, lr_0 = 5.8430e-04
Loss = 3.1864e-02, PNorm = 155.4400, GNorm = 0.2594, lr_0 = 5.8390e-04
Loss = 3.7801e-02, PNorm = 155.4816, GNorm = 0.5402, lr_0 = 5.8350e-04
Loss = 4.2222e-02, PNorm = 155.5285, GNorm = 0.4175, lr_0 = 5.8310e-04
Loss = 3.3397e-02, PNorm = 155.5755, GNorm = 0.3063, lr_0 = 5.8270e-04
Loss = 4.3205e-02, PNorm = 155.6143, GNorm = 0.6924, lr_0 = 5.8230e-04
Loss = 4.8170e-02, PNorm = 155.6590, GNorm = 0.5518, lr_0 = 5.8190e-04
Loss = 3.8976e-02, PNorm = 155.7006, GNorm = 0.5419, lr_0 = 5.8151e-04
Loss = 4.0719e-02, PNorm = 155.7498, GNorm = 0.3466, lr_0 = 5.8111e-04
Loss = 3.5080e-02, PNorm = 155.8028, GNorm = 0.4339, lr_0 = 5.8071e-04
Loss = 4.0702e-02, PNorm = 155.8549, GNorm = 0.6191, lr_0 = 5.8031e-04
Loss = 4.5110e-02, PNorm = 155.9058, GNorm = 0.5894, lr_0 = 5.7991e-04
Loss = 4.4417e-02, PNorm = 155.9507, GNorm = 0.3771, lr_0 = 5.7952e-04
Loss = 3.2157e-02, PNorm = 155.9993, GNorm = 0.3557, lr_0 = 5.7912e-04
Loss = 3.9705e-02, PNorm = 156.0480, GNorm = 0.4983, lr_0 = 5.7872e-04
Loss = 5.0323e-02, PNorm = 156.0933, GNorm = 0.6697, lr_0 = 5.7833e-04
Loss = 3.6842e-02, PNorm = 156.1404, GNorm = 0.2932, lr_0 = 5.7793e-04
Loss = 4.1207e-02, PNorm = 156.1803, GNorm = 0.4529, lr_0 = 5.7753e-04
Loss = 3.4557e-02, PNorm = 156.2236, GNorm = 0.3947, lr_0 = 5.7714e-04
Loss = 5.0353e-02, PNorm = 156.2675, GNorm = 0.3727, lr_0 = 5.7674e-04
Loss = 5.5885e-02, PNorm = 156.3185, GNorm = 0.9953, lr_0 = 5.7635e-04
Loss = 4.4769e-02, PNorm = 156.3676, GNorm = 0.3691, lr_0 = 5.7595e-04
Loss = 3.4094e-02, PNorm = 156.4122, GNorm = 0.2347, lr_0 = 5.7556e-04
Loss = 4.1237e-02, PNorm = 156.4626, GNorm = 0.6966, lr_0 = 5.7516e-04
Loss = 4.4084e-02, PNorm = 156.5057, GNorm = 0.4295, lr_0 = 5.7477e-04
Loss = 3.6088e-02, PNorm = 156.5561, GNorm = 0.3974, lr_0 = 5.7438e-04
Loss = 5.0343e-02, PNorm = 156.6056, GNorm = 0.6524, lr_0 = 5.7398e-04
Loss = 3.6559e-02, PNorm = 156.6574, GNorm = 0.5578, lr_0 = 5.7359e-04
Loss = 3.7809e-02, PNorm = 156.7066, GNorm = 1.0658, lr_0 = 5.7320e-04
Loss = 3.3115e-02, PNorm = 156.7539, GNorm = 0.2689, lr_0 = 5.7280e-04
Loss = 3.8237e-02, PNorm = 156.8042, GNorm = 0.3941, lr_0 = 5.7241e-04
Loss = 3.2755e-02, PNorm = 156.8512, GNorm = 0.2693, lr_0 = 5.7202e-04
Loss = 3.0409e-02, PNorm = 156.9008, GNorm = 0.2779, lr_0 = 5.7163e-04
Loss = 4.6523e-02, PNorm = 156.9434, GNorm = 0.6150, lr_0 = 5.7124e-04
Loss = 4.5349e-02, PNorm = 156.9922, GNorm = 0.4279, lr_0 = 5.7084e-04
Loss = 3.9109e-02, PNorm = 157.0364, GNorm = 0.4683, lr_0 = 5.7045e-04
Loss = 5.3200e-02, PNorm = 157.0854, GNorm = 0.4471, lr_0 = 5.7006e-04
Loss = 4.4773e-02, PNorm = 157.1428, GNorm = 0.3038, lr_0 = 5.6967e-04
Loss = 4.9318e-02, PNorm = 157.1945, GNorm = 0.4230, lr_0 = 5.6928e-04
Loss = 4.2609e-02, PNorm = 157.2370, GNorm = 0.4624, lr_0 = 5.6889e-04
Loss = 4.7163e-02, PNorm = 157.2863, GNorm = 0.4514, lr_0 = 5.6850e-04
Loss = 3.8838e-02, PNorm = 157.3337, GNorm = 0.5678, lr_0 = 5.6811e-04
Loss = 3.6190e-02, PNorm = 157.3807, GNorm = 0.6267, lr_0 = 5.6772e-04
Loss = 4.2469e-02, PNorm = 157.4247, GNorm = 0.6135, lr_0 = 5.6733e-04
Loss = 3.3649e-02, PNorm = 157.4707, GNorm = 0.3647, lr_0 = 5.6695e-04
Loss = 3.9248e-02, PNorm = 157.5123, GNorm = 0.3166, lr_0 = 5.6656e-04
Loss = 3.9061e-02, PNorm = 157.5538, GNorm = 0.4316, lr_0 = 5.6617e-04
Loss = 4.4374e-02, PNorm = 157.6013, GNorm = 0.4633, lr_0 = 5.6578e-04
Loss = 4.4714e-02, PNorm = 157.6492, GNorm = 0.3680, lr_0 = 5.6539e-04
Loss = 5.1287e-02, PNorm = 157.7063, GNorm = 0.8965, lr_0 = 5.6501e-04
Loss = 4.8434e-02, PNorm = 157.7587, GNorm = 0.5927, lr_0 = 5.6462e-04
Loss = 3.9477e-02, PNorm = 157.8146, GNorm = 0.4982, lr_0 = 5.6423e-04
Loss = 4.1120e-02, PNorm = 157.8649, GNorm = 0.4276, lr_0 = 5.6385e-04
Loss = 4.0450e-02, PNorm = 157.9142, GNorm = 0.8190, lr_0 = 5.6346e-04
Loss = 5.5397e-02, PNorm = 157.9670, GNorm = 0.9868, lr_0 = 5.6307e-04
Loss = 5.4150e-02, PNorm = 158.0230, GNorm = 0.5807, lr_0 = 5.6269e-04
Loss = 4.4533e-02, PNorm = 158.0853, GNorm = 0.3586, lr_0 = 5.6230e-04
Validation mae = 0.122696
Epoch 9
Loss = 3.8531e-02, PNorm = 158.1333, GNorm = 0.4380, lr_0 = 5.6192e-04
Loss = 3.0204e-02, PNorm = 158.1752, GNorm = 0.2460, lr_0 = 5.6153e-04
Loss = 2.8884e-02, PNorm = 158.2094, GNorm = 0.4753, lr_0 = 5.6115e-04
Loss = 3.8236e-02, PNorm = 158.2407, GNorm = 0.4392, lr_0 = 5.6076e-04
Loss = 3.1920e-02, PNorm = 158.2759, GNorm = 0.2662, lr_0 = 5.6038e-04
Loss = 4.2464e-02, PNorm = 158.3120, GNorm = 0.2457, lr_0 = 5.6000e-04
Loss = 3.7707e-02, PNorm = 158.3462, GNorm = 0.8012, lr_0 = 5.5961e-04
Loss = 2.9506e-02, PNorm = 158.3781, GNorm = 0.3149, lr_0 = 5.5923e-04
Loss = 3.3784e-02, PNorm = 158.4105, GNorm = 0.5265, lr_0 = 5.5885e-04
Loss = 4.1982e-02, PNorm = 158.4450, GNorm = 0.3011, lr_0 = 5.5846e-04
Loss = 2.9651e-02, PNorm = 158.4813, GNorm = 0.3316, lr_0 = 5.5808e-04
Loss = 3.5814e-02, PNorm = 158.5176, GNorm = 0.5173, lr_0 = 5.5770e-04
Loss = 3.7031e-02, PNorm = 158.5483, GNorm = 0.4463, lr_0 = 5.5732e-04
Loss = 4.1378e-02, PNorm = 158.5840, GNorm = 1.8383, lr_0 = 5.5693e-04
Loss = 2.7032e-02, PNorm = 158.6138, GNorm = 0.4698, lr_0 = 5.5655e-04
Loss = 3.0556e-02, PNorm = 158.6499, GNorm = 0.4063, lr_0 = 5.5617e-04
Loss = 3.9127e-02, PNorm = 158.6855, GNorm = 0.5832, lr_0 = 5.5579e-04
Loss = 3.2594e-02, PNorm = 158.7211, GNorm = 0.4139, lr_0 = 5.5541e-04
Loss = 3.0416e-02, PNorm = 158.7639, GNorm = 0.3956, lr_0 = 5.5503e-04
Loss = 3.3110e-02, PNorm = 158.7978, GNorm = 0.4067, lr_0 = 5.5465e-04
Loss = 3.2979e-02, PNorm = 158.8324, GNorm = 0.3624, lr_0 = 5.5427e-04
Loss = 3.4577e-02, PNorm = 158.8687, GNorm = 0.4419, lr_0 = 5.5389e-04
Loss = 3.0945e-02, PNorm = 158.9016, GNorm = 0.2524, lr_0 = 5.5351e-04
Loss = 2.8224e-02, PNorm = 158.9383, GNorm = 0.3699, lr_0 = 5.5313e-04
Loss = 2.8298e-02, PNorm = 158.9745, GNorm = 0.4253, lr_0 = 5.5275e-04
Loss = 2.8395e-02, PNorm = 159.0130, GNorm = 0.4806, lr_0 = 5.5237e-04
Loss = 3.1330e-02, PNorm = 159.0464, GNorm = 0.3313, lr_0 = 5.5199e-04
Loss = 3.4894e-02, PNorm = 159.0780, GNorm = 0.3118, lr_0 = 5.5162e-04
Loss = 3.5181e-02, PNorm = 159.1117, GNorm = 0.3289, lr_0 = 5.5124e-04
Loss = 2.9930e-02, PNorm = 159.1464, GNorm = 0.6246, lr_0 = 5.5086e-04
Loss = 3.2731e-02, PNorm = 159.1862, GNorm = 0.4456, lr_0 = 5.5048e-04
Loss = 2.8224e-02, PNorm = 159.2211, GNorm = 0.3996, lr_0 = 5.5011e-04
Loss = 2.6808e-02, PNorm = 159.2560, GNorm = 0.4598, lr_0 = 5.4973e-04
Loss = 3.7645e-02, PNorm = 159.2871, GNorm = 0.2708, lr_0 = 5.4935e-04
Loss = 2.7336e-02, PNorm = 159.3179, GNorm = 0.3276, lr_0 = 5.4898e-04
Loss = 2.5433e-02, PNorm = 159.3539, GNorm = 0.5554, lr_0 = 5.4860e-04
Loss = 2.8572e-02, PNorm = 159.3922, GNorm = 0.5309, lr_0 = 5.4822e-04
Loss = 2.9443e-02, PNorm = 159.4267, GNorm = 0.4959, lr_0 = 5.4785e-04
Loss = 3.0475e-02, PNorm = 159.4609, GNorm = 0.5761, lr_0 = 5.4747e-04
Loss = 3.1754e-02, PNorm = 159.5009, GNorm = 0.5679, lr_0 = 5.4710e-04
Loss = 3.1323e-02, PNorm = 159.5395, GNorm = 0.3164, lr_0 = 5.4672e-04
Loss = 3.4488e-02, PNorm = 159.5783, GNorm = 0.3112, lr_0 = 5.4635e-04
Loss = 3.5538e-02, PNorm = 159.6187, GNorm = 2.7876, lr_0 = 5.4597e-04
Loss = 2.8387e-02, PNorm = 159.6441, GNorm = 0.8737, lr_0 = 5.4560e-04
Loss = 3.4773e-02, PNorm = 159.6847, GNorm = 0.4003, lr_0 = 5.4523e-04
Loss = 3.6562e-02, PNorm = 159.7192, GNorm = 0.2630, lr_0 = 5.4485e-04
Loss = 2.9836e-02, PNorm = 159.7587, GNorm = 0.3175, lr_0 = 5.4448e-04
Loss = 3.7385e-02, PNorm = 159.8002, GNorm = 0.2974, lr_0 = 5.4411e-04
Loss = 3.6353e-02, PNorm = 159.8403, GNorm = 0.3133, lr_0 = 5.4373e-04
Loss = 3.7973e-02, PNorm = 159.8797, GNorm = 0.5963, lr_0 = 5.4336e-04
Loss = 2.8050e-02, PNorm = 159.9198, GNorm = 0.4511, lr_0 = 5.4299e-04
Loss = 3.2140e-02, PNorm = 159.9553, GNorm = 0.3345, lr_0 = 5.4262e-04
Loss = 3.2511e-02, PNorm = 159.9948, GNorm = 0.4658, lr_0 = 5.4225e-04
Loss = 3.2313e-02, PNorm = 160.0353, GNorm = 0.1903, lr_0 = 5.4187e-04
Loss = 3.2011e-02, PNorm = 160.0742, GNorm = 0.3657, lr_0 = 5.4150e-04
Loss = 4.5749e-02, PNorm = 160.1184, GNorm = 0.2339, lr_0 = 5.4113e-04
Loss = 2.5243e-02, PNorm = 160.1644, GNorm = 0.1912, lr_0 = 5.4076e-04
Loss = 3.4731e-02, PNorm = 160.1995, GNorm = 0.5832, lr_0 = 5.4039e-04
Loss = 3.1088e-02, PNorm = 160.2400, GNorm = 0.3708, lr_0 = 5.4002e-04
Loss = 3.0436e-02, PNorm = 160.2821, GNorm = 0.2910, lr_0 = 5.3965e-04
Loss = 3.2260e-02, PNorm = 160.3239, GNorm = 0.3445, lr_0 = 5.3928e-04
Loss = 2.6136e-02, PNorm = 160.3635, GNorm = 0.5150, lr_0 = 5.3891e-04
Loss = 2.8639e-02, PNorm = 160.4036, GNorm = 0.6246, lr_0 = 5.3854e-04
Loss = 3.2446e-02, PNorm = 160.4391, GNorm = 0.2484, lr_0 = 5.3817e-04
Loss = 2.9748e-02, PNorm = 160.4750, GNorm = 0.2239, lr_0 = 5.3781e-04
Loss = 2.9852e-02, PNorm = 160.5130, GNorm = 0.2629, lr_0 = 5.3744e-04
Loss = 3.6457e-02, PNorm = 160.5491, GNorm = 0.3949, lr_0 = 5.3707e-04
Loss = 3.4072e-02, PNorm = 160.5894, GNorm = 0.3582, lr_0 = 5.3670e-04
Loss = 3.1737e-02, PNorm = 160.6288, GNorm = 0.3116, lr_0 = 5.3633e-04
Loss = 3.6817e-02, PNorm = 160.6679, GNorm = 0.6026, lr_0 = 5.3597e-04
Loss = 3.3682e-02, PNorm = 160.7111, GNorm = 0.5400, lr_0 = 5.3560e-04
Loss = 2.9430e-02, PNorm = 160.7517, GNorm = 0.1843, lr_0 = 5.3523e-04
Loss = 5.2838e-02, PNorm = 160.7935, GNorm = 0.6700, lr_0 = 5.3486e-04
Loss = 3.6453e-02, PNorm = 160.8357, GNorm = 0.3245, lr_0 = 5.3450e-04
Loss = 3.5918e-02, PNorm = 160.8751, GNorm = 0.3121, lr_0 = 5.3413e-04
Loss = 3.7708e-02, PNorm = 160.9160, GNorm = 0.5022, lr_0 = 5.3377e-04
Loss = 3.4060e-02, PNorm = 160.9554, GNorm = 0.4667, lr_0 = 5.3340e-04
Loss = 3.8424e-02, PNorm = 160.9969, GNorm = 0.3054, lr_0 = 5.3304e-04
Loss = 3.2872e-02, PNorm = 161.0414, GNorm = 0.4935, lr_0 = 5.3267e-04
Loss = 3.1747e-02, PNorm = 161.0812, GNorm = 0.3705, lr_0 = 5.3231e-04
Loss = 3.0254e-02, PNorm = 161.1197, GNorm = 0.4683, lr_0 = 5.3194e-04
Loss = 3.9250e-02, PNorm = 161.1527, GNorm = 0.8074, lr_0 = 5.3158e-04
Loss = 3.2860e-02, PNorm = 161.1953, GNorm = 0.3877, lr_0 = 5.3121e-04
Loss = 3.3698e-02, PNorm = 161.2368, GNorm = 0.5225, lr_0 = 5.3085e-04
Loss = 4.6688e-02, PNorm = 161.2785, GNorm = 0.4498, lr_0 = 5.3048e-04
Loss = 3.2861e-02, PNorm = 161.3267, GNorm = 0.3113, lr_0 = 5.3012e-04
Loss = 2.9014e-02, PNorm = 161.3697, GNorm = 0.2727, lr_0 = 5.2976e-04
Loss = 3.5608e-02, PNorm = 161.4086, GNorm = 0.3285, lr_0 = 5.2939e-04
Loss = 2.5473e-02, PNorm = 161.4506, GNorm = 0.3626, lr_0 = 5.2903e-04
Loss = 3.0645e-02, PNorm = 161.4937, GNorm = 0.5872, lr_0 = 5.2867e-04
Loss = 3.8645e-02, PNorm = 161.5343, GNorm = 0.4197, lr_0 = 5.2831e-04
Loss = 2.8456e-02, PNorm = 161.5763, GNorm = 0.3226, lr_0 = 5.2795e-04
Loss = 3.2760e-02, PNorm = 161.6149, GNorm = 0.5310, lr_0 = 5.2758e-04
Loss = 3.1445e-02, PNorm = 161.6546, GNorm = 0.3360, lr_0 = 5.2722e-04
Loss = 2.9128e-02, PNorm = 161.6921, GNorm = 0.4113, lr_0 = 5.2686e-04
Loss = 3.2756e-02, PNorm = 161.7346, GNorm = 0.2858, lr_0 = 5.2650e-04
Loss = 5.2136e-02, PNorm = 161.7782, GNorm = 0.6427, lr_0 = 5.2614e-04
Loss = 2.8126e-02, PNorm = 161.8261, GNorm = 0.5665, lr_0 = 5.2578e-04
Loss = 3.9253e-02, PNorm = 161.8710, GNorm = 0.5334, lr_0 = 5.2542e-04
Loss = 4.0844e-02, PNorm = 161.9121, GNorm = 0.6565, lr_0 = 5.2506e-04
Loss = 3.5224e-02, PNorm = 161.9560, GNorm = 0.4680, lr_0 = 5.2470e-04
Loss = 2.9160e-02, PNorm = 161.9960, GNorm = 0.5816, lr_0 = 5.2434e-04
Loss = 2.9950e-02, PNorm = 162.0371, GNorm = 0.7740, lr_0 = 5.2398e-04
Loss = 3.1898e-02, PNorm = 162.0746, GNorm = 0.3619, lr_0 = 5.2362e-04
Loss = 2.6369e-02, PNorm = 162.1160, GNorm = 0.3157, lr_0 = 5.2326e-04
Loss = 2.9976e-02, PNorm = 162.1526, GNorm = 0.3449, lr_0 = 5.2290e-04
Loss = 3.2780e-02, PNorm = 162.1906, GNorm = 0.3230, lr_0 = 5.2255e-04
Loss = 2.8388e-02, PNorm = 162.2312, GNorm = 0.3956, lr_0 = 5.2219e-04
Loss = 3.2724e-02, PNorm = 162.2716, GNorm = 1.4148, lr_0 = 5.2183e-04
Loss = 3.1382e-02, PNorm = 162.3113, GNorm = 0.3007, lr_0 = 5.2147e-04
Loss = 3.7010e-02, PNorm = 162.3550, GNorm = 0.2524, lr_0 = 5.2112e-04
Loss = 3.6463e-02, PNorm = 162.3947, GNorm = 0.5611, lr_0 = 5.2076e-04
Loss = 3.5070e-02, PNorm = 162.4322, GNorm = 0.3394, lr_0 = 5.2040e-04
Loss = 3.6031e-02, PNorm = 162.4730, GNorm = 0.2419, lr_0 = 5.2005e-04
Loss = 3.7926e-02, PNorm = 162.5118, GNorm = 0.3409, lr_0 = 5.1969e-04
Loss = 2.8896e-02, PNorm = 162.5521, GNorm = 0.5079, lr_0 = 5.1933e-04
Loss = 3.1876e-02, PNorm = 162.5952, GNorm = 1.0541, lr_0 = 5.1898e-04
Loss = 3.2652e-02, PNorm = 162.6396, GNorm = 0.3581, lr_0 = 5.1862e-04
Loss = 3.0865e-02, PNorm = 162.6797, GNorm = 0.4784, lr_0 = 5.1827e-04
Loss = 4.1001e-02, PNorm = 162.7179, GNorm = 0.3155, lr_0 = 5.1791e-04
Validation mae = 0.123004
Epoch 10
Loss = 3.0842e-02, PNorm = 162.7524, GNorm = 0.4958, lr_0 = 5.1756e-04
Loss = 2.5244e-02, PNorm = 162.7813, GNorm = 0.4263, lr_0 = 5.1720e-04
Loss = 2.9926e-02, PNorm = 162.8075, GNorm = 0.3953, lr_0 = 5.1685e-04
Loss = 3.2512e-02, PNorm = 162.8364, GNorm = 0.6277, lr_0 = 5.1649e-04
Loss = 2.7590e-02, PNorm = 162.8640, GNorm = 0.2781, lr_0 = 5.1614e-04
Loss = 3.2601e-02, PNorm = 162.8904, GNorm = 0.7848, lr_0 = 5.1579e-04
Loss = 2.8727e-02, PNorm = 162.9180, GNorm = 0.4343, lr_0 = 5.1543e-04
Loss = 2.2709e-02, PNorm = 162.9404, GNorm = 0.2656, lr_0 = 5.1508e-04
Loss = 2.1954e-02, PNorm = 162.9677, GNorm = 0.1482, lr_0 = 5.1473e-04
Loss = 2.4355e-02, PNorm = 162.9896, GNorm = 0.3691, lr_0 = 5.1437e-04
Loss = 2.5312e-02, PNorm = 163.0174, GNorm = 0.3280, lr_0 = 5.1402e-04
Loss = 2.8952e-02, PNorm = 163.0504, GNorm = 0.3038, lr_0 = 5.1367e-04
Loss = 2.3913e-02, PNorm = 163.0820, GNorm = 0.3008, lr_0 = 5.1332e-04
Loss = 2.5875e-02, PNorm = 163.1131, GNorm = 0.6920, lr_0 = 5.1297e-04
Loss = 2.2306e-02, PNorm = 163.1429, GNorm = 0.2546, lr_0 = 5.1262e-04
Loss = 2.8429e-02, PNorm = 163.1739, GNorm = 0.3659, lr_0 = 5.1226e-04
Loss = 2.9362e-02, PNorm = 163.2016, GNorm = 0.6750, lr_0 = 5.1191e-04
Loss = 2.3931e-02, PNorm = 163.2323, GNorm = 0.3976, lr_0 = 5.1156e-04
Loss = 2.0799e-02, PNorm = 163.2607, GNorm = 0.2189, lr_0 = 5.1121e-04
Loss = 2.9803e-02, PNorm = 163.2851, GNorm = 0.4147, lr_0 = 5.1086e-04
Loss = 2.8432e-02, PNorm = 163.3123, GNorm = 0.3195, lr_0 = 5.1051e-04
Loss = 2.7382e-02, PNorm = 163.3386, GNorm = 0.3355, lr_0 = 5.1016e-04
Loss = 2.7212e-02, PNorm = 163.3687, GNorm = 0.4776, lr_0 = 5.0981e-04
Loss = 3.0018e-02, PNorm = 163.3997, GNorm = 1.1278, lr_0 = 5.0946e-04
Loss = 3.2352e-02, PNorm = 163.4320, GNorm = 0.5260, lr_0 = 5.0911e-04
Loss = 2.4934e-02, PNorm = 163.4618, GNorm = 0.2478, lr_0 = 5.0877e-04
Loss = 3.0660e-02, PNorm = 163.4928, GNorm = 0.4309, lr_0 = 5.0842e-04
Loss = 2.8464e-02, PNorm = 163.5267, GNorm = 0.3706, lr_0 = 5.0807e-04
Loss = 2.4781e-02, PNorm = 163.5566, GNorm = 1.4690, lr_0 = 5.0772e-04
Loss = 2.6851e-02, PNorm = 163.5835, GNorm = 0.5298, lr_0 = 5.0737e-04
Loss = 2.2493e-02, PNorm = 163.6131, GNorm = 0.2165, lr_0 = 5.0703e-04
Loss = 4.0498e-02, PNorm = 163.6433, GNorm = 0.3366, lr_0 = 5.0668e-04
Loss = 2.1732e-02, PNorm = 163.6717, GNorm = 0.2872, lr_0 = 5.0633e-04
Loss = 2.9442e-02, PNorm = 163.7029, GNorm = 0.3069, lr_0 = 5.0598e-04
Loss = 3.0138e-02, PNorm = 163.7321, GNorm = 0.6283, lr_0 = 5.0564e-04
Loss = 2.9260e-02, PNorm = 163.7617, GNorm = 0.6227, lr_0 = 5.0529e-04
Loss = 2.0606e-02, PNorm = 163.7883, GNorm = 0.4158, lr_0 = 5.0494e-04
Loss = 2.7854e-02, PNorm = 163.8129, GNorm = 0.3753, lr_0 = 5.0460e-04
Loss = 2.6313e-02, PNorm = 163.8450, GNorm = 0.2929, lr_0 = 5.0425e-04
Loss = 2.5860e-02, PNorm = 163.8804, GNorm = 0.2974, lr_0 = 5.0391e-04
Loss = 2.3319e-02, PNorm = 163.9156, GNorm = 0.2116, lr_0 = 5.0356e-04
Loss = 2.6122e-02, PNorm = 163.9453, GNorm = 0.3706, lr_0 = 5.0322e-04
Loss = 2.3995e-02, PNorm = 163.9757, GNorm = 0.4470, lr_0 = 5.0287e-04
Loss = 2.6900e-02, PNorm = 164.0062, GNorm = 0.4136, lr_0 = 5.0253e-04
Loss = 2.3282e-02, PNorm = 164.0330, GNorm = 0.3027, lr_0 = 5.0218e-04
Loss = 3.1098e-02, PNorm = 164.0660, GNorm = 0.7842, lr_0 = 5.0184e-04
Loss = 3.4588e-02, PNorm = 164.1037, GNorm = 0.7615, lr_0 = 5.0150e-04
Loss = 2.2843e-02, PNorm = 164.1428, GNorm = 0.4047, lr_0 = 5.0115e-04
Loss = 2.8560e-02, PNorm = 164.1771, GNorm = 0.5103, lr_0 = 5.0081e-04
Loss = 3.9568e-02, PNorm = 164.2109, GNorm = 0.8508, lr_0 = 5.0047e-04
Loss = 2.9632e-02, PNorm = 164.2465, GNorm = 0.2287, lr_0 = 5.0012e-04
Loss = 3.5127e-02, PNorm = 164.2808, GNorm = 0.2531, lr_0 = 4.9978e-04
Loss = 2.3575e-02, PNorm = 164.3108, GNorm = 0.2204, lr_0 = 4.9944e-04
Loss = 2.8805e-02, PNorm = 164.3427, GNorm = 0.4672, lr_0 = 4.9910e-04
Loss = 2.6344e-02, PNorm = 164.3774, GNorm = 0.3100, lr_0 = 4.9875e-04
Loss = 2.6879e-02, PNorm = 164.4122, GNorm = 0.7895, lr_0 = 4.9841e-04
Loss = 2.3386e-02, PNorm = 164.4485, GNorm = 0.2291, lr_0 = 4.9807e-04
Loss = 2.5307e-02, PNorm = 164.4883, GNorm = 0.2910, lr_0 = 4.9773e-04
Loss = 3.9625e-02, PNorm = 164.5116, GNorm = 0.5614, lr_0 = 4.9739e-04
Loss = 2.5404e-02, PNorm = 164.5437, GNorm = 0.5370, lr_0 = 4.9705e-04
Loss = 2.8803e-02, PNorm = 164.5766, GNorm = 0.7350, lr_0 = 4.9671e-04
Loss = 2.7642e-02, PNorm = 164.6083, GNorm = 0.2303, lr_0 = 4.9637e-04
Loss = 2.1131e-02, PNorm = 164.6410, GNorm = 0.3680, lr_0 = 4.9603e-04
Loss = 2.5575e-02, PNorm = 164.6766, GNorm = 0.5380, lr_0 = 4.9569e-04
Loss = 2.6352e-02, PNorm = 164.7123, GNorm = 0.2702, lr_0 = 4.9535e-04
Loss = 3.2775e-02, PNorm = 164.7480, GNorm = 0.3790, lr_0 = 4.9501e-04
Loss = 2.3576e-02, PNorm = 164.7796, GNorm = 0.4181, lr_0 = 4.9467e-04
Loss = 2.8444e-02, PNorm = 164.8133, GNorm = 0.7648, lr_0 = 4.9433e-04
Loss = 2.7326e-02, PNorm = 164.8514, GNorm = 0.3113, lr_0 = 4.9399e-04
Loss = 2.6048e-02, PNorm = 164.8841, GNorm = 0.2699, lr_0 = 4.9365e-04
Loss = 2.9961e-02, PNorm = 164.9170, GNorm = 0.3135, lr_0 = 4.9332e-04
Loss = 2.0818e-02, PNorm = 164.9498, GNorm = 0.3626, lr_0 = 4.9298e-04
Loss = 2.6252e-02, PNorm = 164.9804, GNorm = 0.2978, lr_0 = 4.9264e-04
Loss = 2.9892e-02, PNorm = 165.0146, GNorm = 0.2403, lr_0 = 4.9230e-04
Loss = 2.4472e-02, PNorm = 165.0492, GNorm = 0.3890, lr_0 = 4.9197e-04
Loss = 2.5852e-02, PNorm = 165.0835, GNorm = 0.2287, lr_0 = 4.9163e-04
Loss = 3.4063e-02, PNorm = 165.1216, GNorm = 0.2239, lr_0 = 4.9129e-04
Loss = 2.3974e-02, PNorm = 165.1569, GNorm = 0.1988, lr_0 = 4.9095e-04
Loss = 2.4219e-02, PNorm = 165.1898, GNorm = 0.5008, lr_0 = 4.9062e-04
Loss = 2.8816e-02, PNorm = 165.2240, GNorm = 0.6863, lr_0 = 4.9028e-04
Loss = 2.5225e-02, PNorm = 165.2578, GNorm = 0.2266, lr_0 = 4.8995e-04
Loss = 2.4178e-02, PNorm = 165.2968, GNorm = 0.2435, lr_0 = 4.8961e-04
Loss = 2.5194e-02, PNorm = 165.3339, GNorm = 0.4198, lr_0 = 4.8928e-04
Loss = 3.5767e-02, PNorm = 165.3651, GNorm = 0.7131, lr_0 = 4.8894e-04
Loss = 3.0216e-02, PNorm = 165.4047, GNorm = 0.4528, lr_0 = 4.8861e-04
Loss = 2.2702e-02, PNorm = 165.4397, GNorm = 0.2535, lr_0 = 4.8827e-04
Loss = 3.0513e-02, PNorm = 165.4737, GNorm = 0.6949, lr_0 = 4.8794e-04
Loss = 3.1189e-02, PNorm = 165.5096, GNorm = 0.4442, lr_0 = 4.8760e-04
Loss = 2.9136e-02, PNorm = 165.5464, GNorm = 0.3422, lr_0 = 4.8727e-04
Loss = 2.4862e-02, PNorm = 165.5856, GNorm = 0.4750, lr_0 = 4.8693e-04
Loss = 2.4484e-02, PNorm = 165.6183, GNorm = 0.3806, lr_0 = 4.8660e-04
Loss = 2.3371e-02, PNorm = 165.6529, GNorm = 0.2280, lr_0 = 4.8627e-04
Loss = 2.7434e-02, PNorm = 165.6910, GNorm = 0.2867, lr_0 = 4.8593e-04
Loss = 3.5429e-02, PNorm = 165.7261, GNorm = 0.3140, lr_0 = 4.8560e-04
Loss = 3.5093e-02, PNorm = 165.7609, GNorm = 0.6944, lr_0 = 4.8527e-04
Loss = 2.6900e-02, PNorm = 165.7953, GNorm = 0.3060, lr_0 = 4.8494e-04
Loss = 3.1638e-02, PNorm = 165.8300, GNorm = 0.4844, lr_0 = 4.8460e-04
Loss = 2.7696e-02, PNorm = 165.8624, GNorm = 0.6359, lr_0 = 4.8427e-04
Loss = 2.5183e-02, PNorm = 165.8972, GNorm = 0.4293, lr_0 = 4.8394e-04
Loss = 2.9268e-02, PNorm = 165.9347, GNorm = 0.3988, lr_0 = 4.8361e-04
Loss = 2.3071e-02, PNorm = 165.9695, GNorm = 0.2853, lr_0 = 4.8328e-04
Loss = 3.3697e-02, PNorm = 166.0037, GNorm = 0.4971, lr_0 = 4.8295e-04
Loss = 3.2997e-02, PNorm = 166.0419, GNorm = 0.2358, lr_0 = 4.8262e-04
Loss = 3.1395e-02, PNorm = 166.0742, GNorm = 0.7497, lr_0 = 4.8228e-04
Loss = 2.4989e-02, PNorm = 166.1118, GNorm = 0.5291, lr_0 = 4.8195e-04
Loss = 2.7131e-02, PNorm = 166.1470, GNorm = 0.4430, lr_0 = 4.8162e-04
Loss = 2.6617e-02, PNorm = 166.1805, GNorm = 0.3434, lr_0 = 4.8129e-04
Loss = 2.4623e-02, PNorm = 166.2164, GNorm = 0.7783, lr_0 = 4.8096e-04
Loss = 2.8861e-02, PNorm = 166.2503, GNorm = 0.4023, lr_0 = 4.8064e-04
Loss = 2.8836e-02, PNorm = 166.2852, GNorm = 0.2856, lr_0 = 4.8031e-04
Loss = 2.8680e-02, PNorm = 166.3224, GNorm = 0.5151, lr_0 = 4.7998e-04
Loss = 2.4816e-02, PNorm = 166.3579, GNorm = 0.3092, lr_0 = 4.7965e-04
Loss = 2.6170e-02, PNorm = 166.3890, GNorm = 0.2531, lr_0 = 4.7932e-04
Loss = 3.3065e-02, PNorm = 166.4212, GNorm = 0.2422, lr_0 = 4.7899e-04
Loss = 3.1457e-02, PNorm = 166.4556, GNorm = 0.2575, lr_0 = 4.7866e-04
Loss = 2.6274e-02, PNorm = 166.4905, GNorm = 0.5243, lr_0 = 4.7833e-04
Loss = 3.2610e-02, PNorm = 166.5212, GNorm = 0.3516, lr_0 = 4.7801e-04
Loss = 2.6950e-02, PNorm = 166.5572, GNorm = 0.2876, lr_0 = 4.7768e-04
Loss = 2.8782e-02, PNorm = 166.5936, GNorm = 0.2820, lr_0 = 4.7735e-04
Loss = 2.4324e-02, PNorm = 166.6305, GNorm = 0.7752, lr_0 = 4.7703e-04
Validation mae = 0.122805
Epoch 11
Loss = 2.8318e-02, PNorm = 166.6642, GNorm = 0.3777, lr_0 = 4.7670e-04
Loss = 3.5285e-02, PNorm = 166.6924, GNorm = 0.4297, lr_0 = 4.7637e-04
Loss = 3.0380e-02, PNorm = 166.7241, GNorm = 0.7157, lr_0 = 4.7605e-04
Loss = 2.4647e-02, PNorm = 166.7540, GNorm = 0.2965, lr_0 = 4.7572e-04
Loss = 2.3259e-02, PNorm = 166.7798, GNorm = 0.3279, lr_0 = 4.7539e-04
Loss = 2.6026e-02, PNorm = 166.8041, GNorm = 0.2412, lr_0 = 4.7507e-04
Loss = 2.5008e-02, PNorm = 166.8291, GNorm = 0.3984, lr_0 = 4.7474e-04
Loss = 2.2692e-02, PNorm = 166.8553, GNorm = 0.2185, lr_0 = 4.7442e-04
Loss = 1.8290e-02, PNorm = 166.8814, GNorm = 0.2006, lr_0 = 4.7409e-04
Loss = 2.2539e-02, PNorm = 166.9064, GNorm = 0.2379, lr_0 = 4.7377e-04
Loss = 2.3235e-02, PNorm = 166.9297, GNorm = 0.3173, lr_0 = 4.7344e-04
Loss = 2.9138e-02, PNorm = 166.9493, GNorm = 0.5756, lr_0 = 4.7312e-04
Loss = 2.1576e-02, PNorm = 166.9738, GNorm = 0.3668, lr_0 = 4.7279e-04
Loss = 3.4073e-02, PNorm = 167.0008, GNorm = 0.4254, lr_0 = 4.7247e-04
Loss = 4.0294e-02, PNorm = 167.0201, GNorm = 2.6731, lr_0 = 4.7215e-04
Loss = 2.0650e-02, PNorm = 167.0409, GNorm = 0.4613, lr_0 = 4.7182e-04
Loss = 2.9019e-02, PNorm = 167.0712, GNorm = 0.5970, lr_0 = 4.7150e-04
Loss = 2.1660e-02, PNorm = 167.1005, GNorm = 0.2856, lr_0 = 4.7118e-04
Loss = 2.0109e-02, PNorm = 167.1280, GNorm = 0.5083, lr_0 = 4.7085e-04
Loss = 1.9956e-02, PNorm = 167.1576, GNorm = 0.2714, lr_0 = 4.7053e-04
Loss = 2.1444e-02, PNorm = 167.1783, GNorm = 0.5155, lr_0 = 4.7021e-04
Loss = 2.1565e-02, PNorm = 167.2017, GNorm = 0.4281, lr_0 = 4.6989e-04
Loss = 2.3651e-02, PNorm = 167.2301, GNorm = 0.3252, lr_0 = 4.6957e-04
Loss = 1.9345e-02, PNorm = 167.2559, GNorm = 0.4778, lr_0 = 4.6924e-04
Loss = 2.3135e-02, PNorm = 167.2818, GNorm = 0.2035, lr_0 = 4.6892e-04
Loss = 2.4008e-02, PNorm = 167.3046, GNorm = 0.3069, lr_0 = 4.6860e-04
Loss = 1.9807e-02, PNorm = 167.3266, GNorm = 0.4898, lr_0 = 4.6828e-04
Loss = 2.7404e-02, PNorm = 167.3512, GNorm = 0.4112, lr_0 = 4.6796e-04
Loss = 2.4167e-02, PNorm = 167.3746, GNorm = 0.6021, lr_0 = 4.6764e-04
Loss = 2.0621e-02, PNorm = 167.3984, GNorm = 0.2571, lr_0 = 4.6732e-04
Loss = 2.6692e-02, PNorm = 167.4230, GNorm = 0.2641, lr_0 = 4.6700e-04
Loss = 2.5971e-02, PNorm = 167.4496, GNorm = 0.3003, lr_0 = 4.6668e-04
Loss = 2.0370e-02, PNorm = 167.4739, GNorm = 0.3485, lr_0 = 4.6636e-04
Loss = 2.2807e-02, PNorm = 167.4991, GNorm = 0.6683, lr_0 = 4.6604e-04
Loss = 1.9831e-02, PNorm = 167.5238, GNorm = 0.3170, lr_0 = 4.6572e-04
Loss = 2.4270e-02, PNorm = 167.5446, GNorm = 0.3222, lr_0 = 4.6540e-04
Loss = 2.0779e-02, PNorm = 167.5676, GNorm = 0.3462, lr_0 = 4.6508e-04
Loss = 2.9728e-02, PNorm = 167.5953, GNorm = 0.5475, lr_0 = 4.6476e-04
Loss = 2.9254e-02, PNorm = 167.6237, GNorm = 0.5419, lr_0 = 4.6445e-04
Loss = 2.3991e-02, PNorm = 167.6529, GNorm = 0.3165, lr_0 = 4.6413e-04
Loss = 2.0054e-02, PNorm = 167.6794, GNorm = 0.3896, lr_0 = 4.6381e-04
Loss = 2.4745e-02, PNorm = 167.7059, GNorm = 0.8506, lr_0 = 4.6349e-04
Loss = 1.6328e-02, PNorm = 167.7306, GNorm = 0.2746, lr_0 = 4.6317e-04
Loss = 2.2575e-02, PNorm = 167.7557, GNorm = 0.3239, lr_0 = 4.6286e-04
Loss = 2.7528e-02, PNorm = 167.7841, GNorm = 0.1932, lr_0 = 4.6254e-04
Loss = 2.3691e-02, PNorm = 167.8085, GNorm = 0.1972, lr_0 = 4.6222e-04
Loss = 2.1548e-02, PNorm = 167.8289, GNorm = 0.9872, lr_0 = 4.6191e-04
Loss = 1.8118e-02, PNorm = 167.8527, GNorm = 0.2731, lr_0 = 4.6159e-04
Loss = 2.1892e-02, PNorm = 167.8789, GNorm = 0.2283, lr_0 = 4.6127e-04
Loss = 2.2335e-02, PNorm = 167.9072, GNorm = 0.1572, lr_0 = 4.6096e-04
Loss = 2.5563e-02, PNorm = 167.9380, GNorm = 0.3762, lr_0 = 4.6064e-04
Loss = 2.8699e-02, PNorm = 167.9629, GNorm = 0.4743, lr_0 = 4.6033e-04
Loss = 2.9302e-02, PNorm = 167.9946, GNorm = 0.4336, lr_0 = 4.6001e-04
Loss = 2.0829e-02, PNorm = 168.0218, GNorm = 0.5183, lr_0 = 4.5970e-04
Loss = 2.1694e-02, PNorm = 168.0454, GNorm = 0.6513, lr_0 = 4.5938e-04
Loss = 2.5541e-02, PNorm = 168.0707, GNorm = 0.5175, lr_0 = 4.5907e-04
Loss = 1.9419e-02, PNorm = 168.0996, GNorm = 0.3723, lr_0 = 4.5875e-04
Loss = 2.5140e-02, PNorm = 168.1274, GNorm = 0.5378, lr_0 = 4.5844e-04
Loss = 2.0996e-02, PNorm = 168.1551, GNorm = 0.3140, lr_0 = 4.5812e-04
Loss = 1.8383e-02, PNorm = 168.1790, GNorm = 0.3114, lr_0 = 4.5781e-04
Loss = 2.4493e-02, PNorm = 168.2026, GNorm = 0.2920, lr_0 = 4.5750e-04
Loss = 1.8730e-02, PNorm = 168.2268, GNorm = 0.3593, lr_0 = 4.5718e-04
Loss = 1.8874e-02, PNorm = 168.2555, GNorm = 0.5769, lr_0 = 4.5687e-04
Loss = 1.8085e-02, PNorm = 168.2812, GNorm = 0.4712, lr_0 = 4.5656e-04
Loss = 2.4804e-02, PNorm = 168.3067, GNorm = 0.4127, lr_0 = 4.5624e-04
Loss = 2.0496e-02, PNorm = 168.3353, GNorm = 0.4990, lr_0 = 4.5593e-04
Loss = 2.5576e-02, PNorm = 168.3666, GNorm = 0.3191, lr_0 = 4.5562e-04
Loss = 2.3064e-02, PNorm = 168.3943, GNorm = 0.4263, lr_0 = 4.5531e-04
Loss = 2.3794e-02, PNorm = 168.4203, GNorm = 0.7570, lr_0 = 4.5499e-04
Loss = 1.7702e-02, PNorm = 168.4420, GNorm = 0.2231, lr_0 = 4.5468e-04
Loss = 1.8939e-02, PNorm = 168.4687, GNorm = 0.3132, lr_0 = 4.5437e-04
Loss = 2.2050e-02, PNorm = 168.4971, GNorm = 0.3178, lr_0 = 4.5406e-04
Loss = 2.0693e-02, PNorm = 168.5242, GNorm = 0.6913, lr_0 = 4.5375e-04
Loss = 3.6605e-02, PNorm = 168.5524, GNorm = 0.5092, lr_0 = 4.5344e-04
Loss = 2.5576e-02, PNorm = 168.5860, GNorm = 0.2849, lr_0 = 4.5313e-04
Loss = 1.9607e-02, PNorm = 168.6109, GNorm = 0.2963, lr_0 = 4.5282e-04
Loss = 2.2154e-02, PNorm = 168.6370, GNorm = 0.7463, lr_0 = 4.5251e-04
Loss = 2.3812e-02, PNorm = 168.6597, GNorm = 0.3061, lr_0 = 4.5220e-04
Loss = 1.8162e-02, PNorm = 168.6856, GNorm = 0.3442, lr_0 = 4.5189e-04
Loss = 2.2591e-02, PNorm = 168.7090, GNorm = 0.2283, lr_0 = 4.5158e-04
Loss = 2.3365e-02, PNorm = 168.7386, GNorm = 1.1182, lr_0 = 4.5127e-04
Loss = 2.1124e-02, PNorm = 168.7693, GNorm = 0.6774, lr_0 = 4.5096e-04
Loss = 1.9219e-02, PNorm = 168.7955, GNorm = 0.2035, lr_0 = 4.5065e-04
Loss = 2.3450e-02, PNorm = 168.8240, GNorm = 0.5026, lr_0 = 4.5034e-04
Loss = 2.4148e-02, PNorm = 168.8506, GNorm = 0.2113, lr_0 = 4.5003e-04
Loss = 2.2827e-02, PNorm = 168.8756, GNorm = 0.8328, lr_0 = 4.4972e-04
Loss = 1.9693e-02, PNorm = 168.8972, GNorm = 0.2148, lr_0 = 4.4942e-04
Loss = 2.9016e-02, PNorm = 168.9224, GNorm = 0.4618, lr_0 = 4.4911e-04
Loss = 2.6502e-02, PNorm = 168.9563, GNorm = 0.1706, lr_0 = 4.4880e-04
Loss = 2.2747e-02, PNorm = 168.9823, GNorm = 0.4873, lr_0 = 4.4849e-04
Loss = 3.3685e-02, PNorm = 169.0135, GNorm = 0.2872, lr_0 = 4.4819e-04
Loss = 2.1183e-02, PNorm = 169.0458, GNorm = 0.5884, lr_0 = 4.4788e-04
Loss = 2.2715e-02, PNorm = 169.0791, GNorm = 0.6478, lr_0 = 4.4757e-04
Loss = 3.3757e-02, PNorm = 169.1150, GNorm = 0.3526, lr_0 = 4.4727e-04
Loss = 2.0590e-02, PNorm = 169.1407, GNorm = 0.6307, lr_0 = 4.4696e-04
Loss = 2.5348e-02, PNorm = 169.1700, GNorm = 0.3110, lr_0 = 4.4665e-04
Loss = 2.3133e-02, PNorm = 169.1981, GNorm = 0.6134, lr_0 = 4.4635e-04
Loss = 2.2414e-02, PNorm = 169.2304, GNorm = 0.1921, lr_0 = 4.4604e-04
Loss = 2.1090e-02, PNorm = 169.2609, GNorm = 0.2302, lr_0 = 4.4574e-04
Loss = 2.1773e-02, PNorm = 169.2912, GNorm = 0.3160, lr_0 = 4.4543e-04
Loss = 2.1713e-02, PNorm = 169.3170, GNorm = 0.2341, lr_0 = 4.4513e-04
Loss = 2.4124e-02, PNorm = 169.3454, GNorm = 0.2680, lr_0 = 4.4482e-04
Loss = 2.1624e-02, PNorm = 169.3757, GNorm = 0.3269, lr_0 = 4.4452e-04
Loss = 1.6884e-02, PNorm = 169.4017, GNorm = 0.2102, lr_0 = 4.4421e-04
Loss = 3.0709e-02, PNorm = 169.4300, GNorm = 0.6129, lr_0 = 4.4391e-04
Loss = 1.9464e-02, PNorm = 169.4613, GNorm = 0.1721, lr_0 = 4.4360e-04
Loss = 2.6842e-02, PNorm = 169.4916, GNorm = 0.4935, lr_0 = 4.4330e-04
Loss = 1.8379e-02, PNorm = 169.5211, GNorm = 0.6908, lr_0 = 4.4299e-04
Loss = 2.8456e-02, PNorm = 169.5469, GNorm = 0.2553, lr_0 = 4.4269e-04
Loss = 2.1714e-02, PNorm = 169.5805, GNorm = 0.3974, lr_0 = 4.4239e-04
Loss = 2.9987e-02, PNorm = 169.6063, GNorm = 0.3787, lr_0 = 4.4209e-04
Loss = 2.2966e-02, PNorm = 169.6359, GNorm = 0.4096, lr_0 = 4.4178e-04
Loss = 2.2753e-02, PNorm = 169.6695, GNorm = 0.3761, lr_0 = 4.4148e-04
Loss = 1.9350e-02, PNorm = 169.7020, GNorm = 0.2845, lr_0 = 4.4118e-04
Loss = 2.2513e-02, PNorm = 169.7333, GNorm = 0.5700, lr_0 = 4.4088e-04
Loss = 2.3136e-02, PNorm = 169.7630, GNorm = 0.4312, lr_0 = 4.4057e-04
Loss = 3.0068e-02, PNorm = 169.7937, GNorm = 0.2630, lr_0 = 4.4027e-04
Loss = 2.9619e-02, PNorm = 169.8230, GNorm = 0.4175, lr_0 = 4.3997e-04
Loss = 2.5230e-02, PNorm = 169.8492, GNorm = 0.5390, lr_0 = 4.3967e-04
Loss = 2.4711e-02, PNorm = 169.8752, GNorm = 0.3484, lr_0 = 4.3937e-04
Validation mae = 0.122510
Epoch 12
Loss = 2.1228e-02, PNorm = 169.8984, GNorm = 0.3385, lr_0 = 4.3907e-04
Loss = 2.2476e-02, PNorm = 169.9213, GNorm = 0.3738, lr_0 = 4.3877e-04
Loss = 1.8325e-02, PNorm = 169.9424, GNorm = 0.3273, lr_0 = 4.3846e-04
Loss = 1.8753e-02, PNorm = 169.9608, GNorm = 0.3910, lr_0 = 4.3816e-04
Loss = 1.9720e-02, PNorm = 169.9815, GNorm = 0.3749, lr_0 = 4.3786e-04
Loss = 1.8051e-02, PNorm = 169.9970, GNorm = 0.2181, lr_0 = 4.3756e-04
Loss = 1.8480e-02, PNorm = 170.0152, GNorm = 0.2074, lr_0 = 4.3726e-04
Loss = 2.2431e-02, PNorm = 170.0390, GNorm = 0.2655, lr_0 = 4.3696e-04
Loss = 1.4356e-02, PNorm = 170.0600, GNorm = 0.2936, lr_0 = 4.3667e-04
Loss = 2.1919e-02, PNorm = 170.0788, GNorm = 0.2305, lr_0 = 4.3637e-04
Loss = 1.7040e-02, PNorm = 170.0968, GNorm = 0.4072, lr_0 = 4.3607e-04
Loss = 2.0918e-02, PNorm = 170.1172, GNorm = 0.7934, lr_0 = 4.3577e-04
Loss = 2.0878e-02, PNorm = 170.1374, GNorm = 0.4492, lr_0 = 4.3547e-04
Loss = 1.9665e-02, PNorm = 170.1586, GNorm = 0.6144, lr_0 = 4.3517e-04
Loss = 2.0345e-02, PNorm = 170.1771, GNorm = 0.3701, lr_0 = 4.3487e-04
Loss = 2.5147e-02, PNorm = 170.1947, GNorm = 0.4434, lr_0 = 4.3458e-04
Loss = 2.1352e-02, PNorm = 170.2151, GNorm = 0.2437, lr_0 = 4.3428e-04
Loss = 1.7137e-02, PNorm = 170.2353, GNorm = 0.2661, lr_0 = 4.3398e-04
Loss = 1.8251e-02, PNorm = 170.2535, GNorm = 0.4126, lr_0 = 4.3368e-04
Loss = 1.7611e-02, PNorm = 170.2712, GNorm = 0.3123, lr_0 = 4.3339e-04
Loss = 3.0975e-02, PNorm = 170.2896, GNorm = 0.2808, lr_0 = 4.3309e-04
Loss = 2.1423e-02, PNorm = 170.3081, GNorm = 0.4573, lr_0 = 4.3279e-04
Loss = 2.1021e-02, PNorm = 170.3296, GNorm = 0.2058, lr_0 = 4.3250e-04
Loss = 1.8400e-02, PNorm = 170.3507, GNorm = 0.7588, lr_0 = 4.3220e-04
Loss = 1.8712e-02, PNorm = 170.3718, GNorm = 0.4289, lr_0 = 4.3190e-04
Loss = 1.8691e-02, PNorm = 170.3930, GNorm = 0.4448, lr_0 = 4.3161e-04
Loss = 2.3402e-02, PNorm = 170.4147, GNorm = 0.3863, lr_0 = 4.3131e-04
Loss = 1.6073e-02, PNorm = 170.4364, GNorm = 0.3894, lr_0 = 4.3102e-04
Loss = 1.7254e-02, PNorm = 170.4523, GNorm = 0.2175, lr_0 = 4.3072e-04
Loss = 1.7381e-02, PNorm = 170.4745, GNorm = 0.1884, lr_0 = 4.3043e-04
Loss = 2.1536e-02, PNorm = 170.4952, GNorm = 0.5391, lr_0 = 4.3013e-04
Loss = 2.1175e-02, PNorm = 170.5213, GNorm = 0.7157, lr_0 = 4.2984e-04
Loss = 2.8543e-02, PNorm = 170.5491, GNorm = 1.6686, lr_0 = 4.2954e-04
Loss = 1.7613e-02, PNorm = 170.5703, GNorm = 0.2919, lr_0 = 4.2925e-04
Loss = 1.4677e-02, PNorm = 170.5931, GNorm = 0.5755, lr_0 = 4.2895e-04
Loss = 1.8536e-02, PNorm = 170.6140, GNorm = 0.6327, lr_0 = 4.2866e-04
Loss = 1.9718e-02, PNorm = 170.6314, GNorm = 0.1563, lr_0 = 4.2837e-04
Loss = 1.8144e-02, PNorm = 170.6486, GNorm = 0.3333, lr_0 = 4.2807e-04
Loss = 1.6872e-02, PNorm = 170.6676, GNorm = 0.4605, lr_0 = 4.2778e-04
Loss = 2.0283e-02, PNorm = 170.6857, GNorm = 0.3276, lr_0 = 4.2749e-04
Loss = 2.3194e-02, PNorm = 170.7059, GNorm = 0.2920, lr_0 = 4.2719e-04
Loss = 1.8336e-02, PNorm = 170.7259, GNorm = 0.2795, lr_0 = 4.2690e-04
Loss = 2.0537e-02, PNorm = 170.7500, GNorm = 0.4764, lr_0 = 4.2661e-04
Loss = 1.7438e-02, PNorm = 170.7746, GNorm = 0.5338, lr_0 = 4.2632e-04
Loss = 1.7578e-02, PNorm = 170.7934, GNorm = 0.2884, lr_0 = 4.2602e-04
Loss = 2.7162e-02, PNorm = 170.8126, GNorm = 0.4711, lr_0 = 4.2573e-04
Loss = 1.6268e-02, PNorm = 170.8321, GNorm = 0.4569, lr_0 = 4.2544e-04
Loss = 1.7318e-02, PNorm = 170.8522, GNorm = 0.4151, lr_0 = 4.2515e-04
Loss = 1.7744e-02, PNorm = 170.8796, GNorm = 0.3427, lr_0 = 4.2486e-04
Loss = 1.7195e-02, PNorm = 170.9006, GNorm = 0.3095, lr_0 = 4.2457e-04
Loss = 2.6425e-02, PNorm = 170.9260, GNorm = 0.6069, lr_0 = 4.2428e-04
Loss = 1.4463e-02, PNorm = 170.9485, GNorm = 0.3658, lr_0 = 4.2399e-04
Loss = 1.7161e-02, PNorm = 170.9692, GNorm = 0.2798, lr_0 = 4.2370e-04
Loss = 1.4048e-02, PNorm = 170.9896, GNorm = 0.1752, lr_0 = 4.2340e-04
Loss = 1.5784e-02, PNorm = 171.0078, GNorm = 0.2873, lr_0 = 4.2311e-04
Loss = 2.3991e-02, PNorm = 171.0278, GNorm = 0.3383, lr_0 = 4.2283e-04
Loss = 1.7095e-02, PNorm = 171.0451, GNorm = 0.4028, lr_0 = 4.2254e-04
Loss = 2.8833e-02, PNorm = 171.0663, GNorm = 0.5079, lr_0 = 4.2225e-04
Loss = 1.8339e-02, PNorm = 171.0865, GNorm = 0.3123, lr_0 = 4.2196e-04
Loss = 1.7563e-02, PNorm = 171.1112, GNorm = 0.2910, lr_0 = 4.2167e-04
Loss = 4.0422e-02, PNorm = 171.1359, GNorm = 1.5370, lr_0 = 4.2138e-04
Loss = 1.6856e-02, PNorm = 171.1578, GNorm = 0.2319, lr_0 = 4.2109e-04
Loss = 2.1466e-02, PNorm = 171.1819, GNorm = 0.2226, lr_0 = 4.2080e-04
Loss = 1.7769e-02, PNorm = 171.2043, GNorm = 0.2118, lr_0 = 4.2051e-04
Loss = 1.7422e-02, PNorm = 171.2263, GNorm = 0.5241, lr_0 = 4.2023e-04
Loss = 1.5003e-02, PNorm = 171.2508, GNorm = 0.2669, lr_0 = 4.1994e-04
Loss = 1.9217e-02, PNorm = 171.2748, GNorm = 0.2853, lr_0 = 4.1965e-04
Loss = 1.7288e-02, PNorm = 171.2977, GNorm = 0.1700, lr_0 = 4.1936e-04
Loss = 1.9308e-02, PNorm = 171.3165, GNorm = 0.2826, lr_0 = 4.1907e-04
Loss = 2.1423e-02, PNorm = 171.3382, GNorm = 0.3005, lr_0 = 4.1879e-04
Loss = 1.8535e-02, PNorm = 171.3647, GNorm = 0.3028, lr_0 = 4.1850e-04
Loss = 1.7993e-02, PNorm = 171.3883, GNorm = 0.1966, lr_0 = 4.1821e-04
Loss = 2.3302e-02, PNorm = 171.4109, GNorm = 0.1896, lr_0 = 4.1793e-04
Loss = 1.7280e-02, PNorm = 171.4350, GNorm = 0.2103, lr_0 = 4.1764e-04
Loss = 1.6505e-02, PNorm = 171.4568, GNorm = 0.5856, lr_0 = 4.1736e-04
Loss = 2.0855e-02, PNorm = 171.4764, GNorm = 0.3676, lr_0 = 4.1707e-04
Loss = 2.0172e-02, PNorm = 171.4987, GNorm = 0.5634, lr_0 = 4.1678e-04
Loss = 1.5623e-02, PNorm = 171.5237, GNorm = 0.2514, lr_0 = 4.1650e-04
Loss = 1.7718e-02, PNorm = 171.5470, GNorm = 0.3625, lr_0 = 4.1621e-04
Loss = 2.0325e-02, PNorm = 171.5694, GNorm = 0.8551, lr_0 = 4.1593e-04
Loss = 1.8565e-02, PNorm = 171.5901, GNorm = 0.2507, lr_0 = 4.1564e-04
Loss = 1.8247e-02, PNorm = 171.6139, GNorm = 0.3228, lr_0 = 4.1536e-04
Loss = 1.5379e-02, PNorm = 171.6355, GNorm = 0.1677, lr_0 = 4.1507e-04
Loss = 1.7190e-02, PNorm = 171.6547, GNorm = 0.1622, lr_0 = 4.1479e-04
Loss = 1.5586e-02, PNorm = 171.6764, GNorm = 0.1690, lr_0 = 4.1450e-04
Loss = 2.0460e-02, PNorm = 171.6993, GNorm = 0.1748, lr_0 = 4.1422e-04
Loss = 2.9978e-02, PNorm = 171.7248, GNorm = 0.2764, lr_0 = 4.1394e-04
Loss = 2.1468e-02, PNorm = 171.7514, GNorm = 0.4661, lr_0 = 4.1365e-04
Loss = 1.9535e-02, PNorm = 171.7730, GNorm = 0.3263, lr_0 = 4.1337e-04
Loss = 2.1584e-02, PNorm = 171.7921, GNorm = 0.2359, lr_0 = 4.1309e-04
Loss = 2.0462e-02, PNorm = 171.8139, GNorm = 0.2032, lr_0 = 4.1280e-04
Loss = 2.0934e-02, PNorm = 171.8362, GNorm = 0.4092, lr_0 = 4.1252e-04
Loss = 2.2452e-02, PNorm = 171.8576, GNorm = 0.3153, lr_0 = 4.1224e-04
Loss = 2.0054e-02, PNorm = 171.8769, GNorm = 0.4157, lr_0 = 4.1196e-04
Loss = 1.6077e-02, PNorm = 171.9011, GNorm = 0.2265, lr_0 = 4.1167e-04
Loss = 1.8975e-02, PNorm = 171.9294, GNorm = 0.2879, lr_0 = 4.1139e-04
Loss = 1.9724e-02, PNorm = 171.9515, GNorm = 0.4240, lr_0 = 4.1111e-04
Loss = 2.0694e-02, PNorm = 171.9716, GNorm = 0.2014, lr_0 = 4.1083e-04
Loss = 1.6246e-02, PNorm = 171.9908, GNorm = 0.2460, lr_0 = 4.1055e-04
Loss = 1.7150e-02, PNorm = 172.0094, GNorm = 0.3797, lr_0 = 4.1027e-04
Loss = 1.5604e-02, PNorm = 172.0273, GNorm = 0.3418, lr_0 = 4.0998e-04
Loss = 2.0275e-02, PNorm = 172.0497, GNorm = 0.1782, lr_0 = 4.0970e-04
Loss = 1.7527e-02, PNorm = 172.0737, GNorm = 0.2567, lr_0 = 4.0942e-04
Loss = 2.0023e-02, PNorm = 172.0957, GNorm = 0.4218, lr_0 = 4.0914e-04
Loss = 2.4275e-02, PNorm = 172.1178, GNorm = 0.4231, lr_0 = 4.0886e-04
Loss = 1.8172e-02, PNorm = 172.1428, GNorm = 0.4531, lr_0 = 4.0858e-04
Loss = 1.9796e-02, PNorm = 172.1655, GNorm = 0.1996, lr_0 = 4.0830e-04
Loss = 1.6669e-02, PNorm = 172.1904, GNorm = 0.4096, lr_0 = 4.0802e-04
Loss = 2.3893e-02, PNorm = 172.2150, GNorm = 0.3067, lr_0 = 4.0774e-04
Loss = 1.6549e-02, PNorm = 172.2404, GNorm = 0.4698, lr_0 = 4.0746e-04
Loss = 1.9462e-02, PNorm = 172.2641, GNorm = 0.2094, lr_0 = 4.0718e-04
Loss = 2.2788e-02, PNorm = 172.2899, GNorm = 0.2525, lr_0 = 4.0691e-04
Loss = 2.3465e-02, PNorm = 172.3176, GNorm = 0.2777, lr_0 = 4.0663e-04
Loss = 2.4180e-02, PNorm = 172.3421, GNorm = 0.5853, lr_0 = 4.0635e-04
Loss = 1.9223e-02, PNorm = 172.3630, GNorm = 0.4312, lr_0 = 4.0607e-04
Loss = 2.0982e-02, PNorm = 172.3883, GNorm = 0.4579, lr_0 = 4.0579e-04
Loss = 2.2803e-02, PNorm = 172.4139, GNorm = 0.2952, lr_0 = 4.0551e-04
Loss = 1.6630e-02, PNorm = 172.4357, GNorm = 0.2806, lr_0 = 4.0524e-04
Loss = 1.6846e-02, PNorm = 172.4604, GNorm = 0.3082, lr_0 = 4.0496e-04
Loss = 1.4774e-02, PNorm = 172.4839, GNorm = 0.3721, lr_0 = 4.0468e-04
Validation mae = 0.122021
Epoch 13
Loss = 1.4276e-02, PNorm = 172.5019, GNorm = 0.2589, lr_0 = 4.0440e-04
Loss = 1.5188e-02, PNorm = 172.5180, GNorm = 0.3023, lr_0 = 4.0413e-04
Loss = 1.5844e-02, PNorm = 172.5345, GNorm = 0.2050, lr_0 = 4.0385e-04
Loss = 1.5407e-02, PNorm = 172.5546, GNorm = 0.1911, lr_0 = 4.0357e-04
Loss = 1.5556e-02, PNorm = 172.5732, GNorm = 0.5289, lr_0 = 4.0330e-04
Loss = 1.5751e-02, PNorm = 172.5881, GNorm = 0.3251, lr_0 = 4.0302e-04
Loss = 1.4220e-02, PNorm = 172.6058, GNorm = 0.3521, lr_0 = 4.0274e-04
Loss = 1.4528e-02, PNorm = 172.6243, GNorm = 0.4207, lr_0 = 4.0247e-04
Loss = 1.9254e-02, PNorm = 172.6434, GNorm = 0.6119, lr_0 = 4.0219e-04
Loss = 1.8086e-02, PNorm = 172.6588, GNorm = 0.2571, lr_0 = 4.0192e-04
Loss = 1.5952e-02, PNorm = 172.6760, GNorm = 0.3654, lr_0 = 4.0164e-04
Loss = 1.8551e-02, PNorm = 172.6971, GNorm = 0.2406, lr_0 = 4.0137e-04
Loss = 1.5143e-02, PNorm = 172.7156, GNorm = 0.5452, lr_0 = 4.0109e-04
Loss = 1.3208e-02, PNorm = 172.7327, GNorm = 0.4261, lr_0 = 4.0082e-04
Loss = 1.3574e-02, PNorm = 172.7477, GNorm = 0.1692, lr_0 = 4.0054e-04
Loss = 1.7591e-02, PNorm = 172.7645, GNorm = 0.2902, lr_0 = 4.0027e-04
Loss = 1.3615e-02, PNorm = 172.7837, GNorm = 0.2102, lr_0 = 3.9999e-04
Loss = 1.4879e-02, PNorm = 172.8026, GNorm = 0.3946, lr_0 = 3.9972e-04
Loss = 1.3769e-02, PNorm = 172.8190, GNorm = 0.3186, lr_0 = 3.9945e-04
Loss = 1.7005e-02, PNorm = 172.8378, GNorm = 0.2148, lr_0 = 3.9917e-04
Loss = 1.6520e-02, PNorm = 172.8545, GNorm = 0.2102, lr_0 = 3.9890e-04
Loss = 1.9324e-02, PNorm = 172.8693, GNorm = 0.3314, lr_0 = 3.9863e-04
Loss = 2.1836e-02, PNorm = 172.8833, GNorm = 0.3337, lr_0 = 3.9835e-04
Loss = 2.2514e-02, PNorm = 172.8978, GNorm = 0.4508, lr_0 = 3.9808e-04
Loss = 1.4876e-02, PNorm = 172.9164, GNorm = 0.4993, lr_0 = 3.9781e-04
Loss = 1.5676e-02, PNorm = 172.9339, GNorm = 0.3689, lr_0 = 3.9753e-04
Loss = 2.1759e-02, PNorm = 172.9511, GNorm = 0.5272, lr_0 = 3.9726e-04
Loss = 1.5403e-02, PNorm = 172.9708, GNorm = 0.2115, lr_0 = 3.9699e-04
Loss = 1.9852e-02, PNorm = 172.9897, GNorm = 0.2597, lr_0 = 3.9672e-04
Loss = 1.1835e-02, PNorm = 173.0086, GNorm = 0.2593, lr_0 = 3.9645e-04
Loss = 1.6101e-02, PNorm = 173.0245, GNorm = 0.5286, lr_0 = 3.9617e-04
Loss = 1.3451e-02, PNorm = 173.0417, GNorm = 0.2818, lr_0 = 3.9590e-04
Loss = 1.3379e-02, PNorm = 173.0591, GNorm = 0.1640, lr_0 = 3.9563e-04
Loss = 2.0542e-02, PNorm = 173.0735, GNorm = 0.5464, lr_0 = 3.9536e-04
Loss = 1.6800e-02, PNorm = 173.0915, GNorm = 0.2715, lr_0 = 3.9509e-04
Loss = 1.7555e-02, PNorm = 173.1089, GNorm = 0.2297, lr_0 = 3.9482e-04
Loss = 1.7110e-02, PNorm = 173.1253, GNorm = 0.2671, lr_0 = 3.9455e-04
Loss = 1.3512e-02, PNorm = 173.1429, GNorm = 0.1476, lr_0 = 3.9428e-04
Loss = 1.5430e-02, PNorm = 173.1593, GNorm = 0.2350, lr_0 = 3.9401e-04
Loss = 1.3819e-02, PNorm = 173.1729, GNorm = 0.3587, lr_0 = 3.9374e-04
Loss = 1.2455e-02, PNorm = 173.1893, GNorm = 0.3087, lr_0 = 3.9347e-04
Loss = 1.4391e-02, PNorm = 173.2077, GNorm = 0.1347, lr_0 = 3.9320e-04
Loss = 1.6814e-02, PNorm = 173.2267, GNorm = 0.3371, lr_0 = 3.9293e-04
Loss = 1.3713e-02, PNorm = 173.2453, GNorm = 0.1942, lr_0 = 3.9266e-04
Loss = 1.4615e-02, PNorm = 173.2617, GNorm = 0.2685, lr_0 = 3.9239e-04
Loss = 1.6806e-02, PNorm = 173.2805, GNorm = 0.4585, lr_0 = 3.9212e-04
Loss = 2.2068e-02, PNorm = 173.2977, GNorm = 0.5476, lr_0 = 3.9185e-04
Loss = 1.4003e-02, PNorm = 173.3146, GNorm = 0.2108, lr_0 = 3.9159e-04
Loss = 1.3769e-02, PNorm = 173.3306, GNorm = 0.1899, lr_0 = 3.9132e-04
Loss = 1.6996e-02, PNorm = 173.3465, GNorm = 0.1845, lr_0 = 3.9105e-04
Loss = 1.4425e-02, PNorm = 173.3612, GNorm = 0.2839, lr_0 = 3.9078e-04
Loss = 1.9318e-02, PNorm = 173.3759, GNorm = 0.3303, lr_0 = 3.9051e-04
Loss = 1.8711e-02, PNorm = 173.3933, GNorm = 0.2211, lr_0 = 3.9025e-04
Loss = 2.1958e-02, PNorm = 173.4123, GNorm = 0.4176, lr_0 = 3.8998e-04
Loss = 1.7231e-02, PNorm = 173.4295, GNorm = 0.5329, lr_0 = 3.8971e-04
Loss = 1.9598e-02, PNorm = 173.4501, GNorm = 0.3646, lr_0 = 3.8945e-04
Loss = 1.8927e-02, PNorm = 173.4717, GNorm = 0.3355, lr_0 = 3.8918e-04
Loss = 1.4086e-02, PNorm = 173.4961, GNorm = 0.2211, lr_0 = 3.8891e-04
Loss = 1.7127e-02, PNorm = 173.5133, GNorm = 0.4011, lr_0 = 3.8865e-04
Loss = 1.5408e-02, PNorm = 173.5325, GNorm = 0.3154, lr_0 = 3.8838e-04
Loss = 2.2755e-02, PNorm = 173.5478, GNorm = 0.5261, lr_0 = 3.8811e-04
Loss = 2.1753e-02, PNorm = 173.5711, GNorm = 1.0444, lr_0 = 3.8785e-04
Loss = 2.1684e-02, PNorm = 173.5933, GNorm = 0.6527, lr_0 = 3.8758e-04
Loss = 1.2923e-02, PNorm = 173.6145, GNorm = 0.1880, lr_0 = 3.8732e-04
Loss = 2.0351e-02, PNorm = 173.6324, GNorm = 0.3063, lr_0 = 3.8705e-04
Loss = 1.2988e-02, PNorm = 173.6493, GNorm = 0.2294, lr_0 = 3.8679e-04
Loss = 1.2396e-02, PNorm = 173.6701, GNorm = 0.1574, lr_0 = 3.8652e-04
Loss = 1.4732e-02, PNorm = 173.6845, GNorm = 0.2977, lr_0 = 3.8626e-04
Loss = 1.5262e-02, PNorm = 173.7022, GNorm = 0.2605, lr_0 = 3.8599e-04
Loss = 1.7246e-02, PNorm = 173.7216, GNorm = 0.2779, lr_0 = 3.8573e-04
Loss = 2.4614e-02, PNorm = 173.7372, GNorm = 0.2252, lr_0 = 3.8546e-04
Loss = 1.6669e-02, PNorm = 173.7590, GNorm = 0.2825, lr_0 = 3.8520e-04
Loss = 1.5042e-02, PNorm = 173.7810, GNorm = 0.3721, lr_0 = 3.8493e-04
Loss = 1.4375e-02, PNorm = 173.7975, GNorm = 0.4609, lr_0 = 3.8467e-04
Loss = 1.8560e-02, PNorm = 173.8171, GNorm = 0.1686, lr_0 = 3.8441e-04
Loss = 1.9591e-02, PNorm = 173.8369, GNorm = 0.2311, lr_0 = 3.8414e-04
Loss = 1.6845e-02, PNorm = 173.8579, GNorm = 0.2336, lr_0 = 3.8388e-04
Loss = 1.4262e-02, PNorm = 173.8800, GNorm = 0.2127, lr_0 = 3.8362e-04
Loss = 1.3681e-02, PNorm = 173.9035, GNorm = 0.2184, lr_0 = 3.8336e-04
Loss = 1.7668e-02, PNorm = 173.9243, GNorm = 0.4352, lr_0 = 3.8309e-04
Loss = 1.3763e-02, PNorm = 173.9410, GNorm = 0.2973, lr_0 = 3.8283e-04
Loss = 1.2221e-02, PNorm = 173.9566, GNorm = 0.2406, lr_0 = 3.8257e-04
Loss = 1.4154e-02, PNorm = 173.9760, GNorm = 0.2061, lr_0 = 3.8231e-04
Loss = 1.2506e-02, PNorm = 173.9947, GNorm = 0.5819, lr_0 = 3.8204e-04
Loss = 1.4118e-02, PNorm = 174.0140, GNorm = 0.3297, lr_0 = 3.8178e-04
Loss = 1.5852e-02, PNorm = 174.0342, GNorm = 0.2799, lr_0 = 3.8152e-04
Loss = 1.4155e-02, PNorm = 174.0538, GNorm = 0.2806, lr_0 = 3.8126e-04
Loss = 1.6476e-02, PNorm = 174.0717, GNorm = 0.3247, lr_0 = 3.8100e-04
Loss = 2.3175e-02, PNorm = 174.0894, GNorm = 0.1266, lr_0 = 3.8074e-04
Loss = 1.5719e-02, PNorm = 174.1051, GNorm = 0.4011, lr_0 = 3.8048e-04
Loss = 1.8009e-02, PNorm = 174.1214, GNorm = 0.4985, lr_0 = 3.8022e-04
Loss = 2.7351e-02, PNorm = 174.1394, GNorm = 0.1704, lr_0 = 3.7995e-04
Loss = 2.8403e-02, PNorm = 174.1550, GNorm = 0.3154, lr_0 = 3.7969e-04
Loss = 1.7864e-02, PNorm = 174.1778, GNorm = 0.3187, lr_0 = 3.7943e-04
Loss = 1.8238e-02, PNorm = 174.1988, GNorm = 0.3711, lr_0 = 3.7917e-04
Loss = 1.6902e-02, PNorm = 174.2186, GNorm = 0.4637, lr_0 = 3.7891e-04
Loss = 1.7326e-02, PNorm = 174.2392, GNorm = 0.2092, lr_0 = 3.7866e-04
Loss = 1.3344e-02, PNorm = 174.2616, GNorm = 0.2793, lr_0 = 3.7840e-04
Loss = 2.1334e-02, PNorm = 174.2808, GNorm = 0.2483, lr_0 = 3.7814e-04
Loss = 1.6431e-02, PNorm = 174.3022, GNorm = 0.4830, lr_0 = 3.7788e-04
Loss = 1.6769e-02, PNorm = 174.3204, GNorm = 0.1809, lr_0 = 3.7762e-04
Loss = 1.7626e-02, PNorm = 174.3433, GNorm = 0.3204, lr_0 = 3.7736e-04
Loss = 1.6377e-02, PNorm = 174.3613, GNorm = 0.2278, lr_0 = 3.7710e-04
Loss = 1.6686e-02, PNorm = 174.3794, GNorm = 0.1471, lr_0 = 3.7684e-04
Loss = 1.5400e-02, PNorm = 174.3979, GNorm = 0.1934, lr_0 = 3.7659e-04
Loss = 1.7465e-02, PNorm = 174.4187, GNorm = 0.1377, lr_0 = 3.7633e-04
Loss = 1.6074e-02, PNorm = 174.4429, GNorm = 0.6239, lr_0 = 3.7607e-04
Loss = 1.1638e-02, PNorm = 174.4664, GNorm = 0.3002, lr_0 = 3.7581e-04
Loss = 2.1071e-02, PNorm = 174.4861, GNorm = 0.2228, lr_0 = 3.7555e-04
Loss = 1.6589e-02, PNorm = 174.5069, GNorm = 0.5119, lr_0 = 3.7530e-04
Loss = 1.4479e-02, PNorm = 174.5235, GNorm = 0.2146, lr_0 = 3.7504e-04
Loss = 1.3814e-02, PNorm = 174.5455, GNorm = 0.3184, lr_0 = 3.7478e-04
Loss = 2.0472e-02, PNorm = 174.5673, GNorm = 0.2855, lr_0 = 3.7453e-04
Loss = 1.6867e-02, PNorm = 174.5905, GNorm = 0.2323, lr_0 = 3.7427e-04
Loss = 1.9763e-02, PNorm = 174.6126, GNorm = 0.2386, lr_0 = 3.7401e-04
Loss = 1.3681e-02, PNorm = 174.6357, GNorm = 0.6394, lr_0 = 3.7376e-04
Loss = 1.5137e-02, PNorm = 174.6565, GNorm = 0.1766, lr_0 = 3.7350e-04
Loss = 1.4275e-02, PNorm = 174.6755, GNorm = 0.1760, lr_0 = 3.7325e-04
Loss = 1.5032e-02, PNorm = 174.6962, GNorm = 0.3551, lr_0 = 3.7299e-04
Loss = 1.5487e-02, PNorm = 174.7160, GNorm = 0.2161, lr_0 = 3.7273e-04
Validation mae = 0.122317
Epoch 14
Loss = 1.3152e-02, PNorm = 174.7365, GNorm = 0.1625, lr_0 = 3.7248e-04
Loss = 1.6285e-02, PNorm = 174.7495, GNorm = 0.2117, lr_0 = 3.7222e-04
Loss = 1.2776e-02, PNorm = 174.7606, GNorm = 0.3380, lr_0 = 3.7197e-04
Loss = 1.1336e-02, PNorm = 174.7721, GNorm = 0.1841, lr_0 = 3.7171e-04
Loss = 1.2750e-02, PNorm = 174.7841, GNorm = 0.1357, lr_0 = 3.7146e-04
Loss = 1.2180e-02, PNorm = 174.7990, GNorm = 0.1805, lr_0 = 3.7120e-04
Loss = 1.5549e-02, PNorm = 174.8146, GNorm = 0.2575, lr_0 = 3.7095e-04
Loss = 1.2453e-02, PNorm = 174.8330, GNorm = 0.1462, lr_0 = 3.7070e-04
Loss = 1.1513e-02, PNorm = 174.8461, GNorm = 0.3429, lr_0 = 3.7044e-04
Loss = 1.6203e-02, PNorm = 174.8577, GNorm = 0.3028, lr_0 = 3.7019e-04
Loss = 1.1367e-02, PNorm = 174.8715, GNorm = 0.1813, lr_0 = 3.6993e-04
Loss = 1.1583e-02, PNorm = 174.8877, GNorm = 0.3006, lr_0 = 3.6968e-04
Loss = 1.5396e-02, PNorm = 174.9051, GNorm = 0.5477, lr_0 = 3.6943e-04
Loss = 1.6045e-02, PNorm = 174.9189, GNorm = 0.2854, lr_0 = 3.6917e-04
Loss = 1.2549e-02, PNorm = 174.9334, GNorm = 0.1984, lr_0 = 3.6892e-04
Loss = 1.3033e-02, PNorm = 174.9492, GNorm = 0.2542, lr_0 = 3.6867e-04
Loss = 1.9049e-02, PNorm = 174.9646, GNorm = 0.1994, lr_0 = 3.6842e-04
Loss = 1.7569e-02, PNorm = 174.9799, GNorm = 1.7796, lr_0 = 3.6816e-04
Loss = 1.5290e-02, PNorm = 174.9933, GNorm = 0.5837, lr_0 = 3.6791e-04
Loss = 1.1171e-02, PNorm = 175.0103, GNorm = 0.3596, lr_0 = 3.6766e-04
Loss = 1.4952e-02, PNorm = 175.0248, GNorm = 0.1690, lr_0 = 3.6741e-04
Loss = 1.1156e-02, PNorm = 175.0409, GNorm = 0.2144, lr_0 = 3.6716e-04
Loss = 1.8072e-02, PNorm = 175.0570, GNorm = 0.2661, lr_0 = 3.6690e-04
Loss = 1.4461e-02, PNorm = 175.0723, GNorm = 0.1804, lr_0 = 3.6665e-04
Loss = 1.2579e-02, PNorm = 175.0851, GNorm = 0.2619, lr_0 = 3.6640e-04
Loss = 1.2596e-02, PNorm = 175.1008, GNorm = 0.2575, lr_0 = 3.6615e-04
Loss = 1.5246e-02, PNorm = 175.1139, GNorm = 0.2384, lr_0 = 3.6590e-04
Loss = 1.3032e-02, PNorm = 175.1281, GNorm = 0.2046, lr_0 = 3.6565e-04
Loss = 1.1044e-02, PNorm = 175.1441, GNorm = 0.2058, lr_0 = 3.6540e-04
Loss = 1.8613e-02, PNorm = 175.1605, GNorm = 0.3828, lr_0 = 3.6515e-04
Loss = 1.2182e-02, PNorm = 175.1793, GNorm = 0.1881, lr_0 = 3.6490e-04
Loss = 1.2039e-02, PNorm = 175.1953, GNorm = 0.4999, lr_0 = 3.6465e-04
Loss = 1.5127e-02, PNorm = 175.2087, GNorm = 0.2668, lr_0 = 3.6440e-04
Loss = 1.1460e-02, PNorm = 175.2271, GNorm = 0.1653, lr_0 = 3.6415e-04
Loss = 1.2134e-02, PNorm = 175.2407, GNorm = 0.2958, lr_0 = 3.6390e-04
Loss = 1.6438e-02, PNorm = 175.2530, GNorm = 0.5459, lr_0 = 3.6365e-04
Loss = 1.5258e-02, PNorm = 175.2678, GNorm = 0.2977, lr_0 = 3.6340e-04
Loss = 1.9197e-02, PNorm = 175.2828, GNorm = 0.4765, lr_0 = 3.6315e-04
Loss = 1.3208e-02, PNorm = 175.2994, GNorm = 0.1329, lr_0 = 3.6290e-04
Loss = 1.3263e-02, PNorm = 175.3160, GNorm = 0.1884, lr_0 = 3.6266e-04
Loss = 1.4960e-02, PNorm = 175.3342, GNorm = 0.1957, lr_0 = 3.6241e-04
Loss = 1.1643e-02, PNorm = 175.3500, GNorm = 0.1211, lr_0 = 3.6216e-04
Loss = 1.6718e-02, PNorm = 175.3640, GNorm = 0.4437, lr_0 = 3.6191e-04
Loss = 1.1638e-02, PNorm = 175.3813, GNorm = 0.2806, lr_0 = 3.6166e-04
Loss = 1.2781e-02, PNorm = 175.3980, GNorm = 0.2274, lr_0 = 3.6141e-04
Loss = 1.3298e-02, PNorm = 175.4115, GNorm = 0.1717, lr_0 = 3.6117e-04
Loss = 1.6814e-02, PNorm = 175.4238, GNorm = 0.3198, lr_0 = 3.6092e-04
Loss = 1.2070e-02, PNorm = 175.4379, GNorm = 0.2360, lr_0 = 3.6067e-04
Loss = 1.4059e-02, PNorm = 175.4488, GNorm = 0.2312, lr_0 = 3.6043e-04
Loss = 1.3132e-02, PNorm = 175.4615, GNorm = 0.2892, lr_0 = 3.6018e-04
Loss = 1.1547e-02, PNorm = 175.4739, GNorm = 0.3454, lr_0 = 3.5993e-04
Loss = 2.1489e-02, PNorm = 175.4876, GNorm = 0.2716, lr_0 = 3.5969e-04
Loss = 1.9290e-02, PNorm = 175.5052, GNorm = 0.2188, lr_0 = 3.5944e-04
Loss = 1.8719e-02, PNorm = 175.5202, GNorm = 0.3768, lr_0 = 3.5919e-04
Loss = 1.0639e-02, PNorm = 175.5337, GNorm = 0.1379, lr_0 = 3.5895e-04
Loss = 1.3667e-02, PNorm = 175.5490, GNorm = 0.2875, lr_0 = 3.5870e-04
Loss = 1.3585e-02, PNorm = 175.5642, GNorm = 0.2381, lr_0 = 3.5845e-04
Loss = 1.0558e-02, PNorm = 175.5829, GNorm = 0.1318, lr_0 = 3.5821e-04
Loss = 1.2475e-02, PNorm = 175.6007, GNorm = 0.3200, lr_0 = 3.5796e-04
Loss = 1.4732e-02, PNorm = 175.6167, GNorm = 0.5219, lr_0 = 3.5772e-04
Loss = 1.1159e-02, PNorm = 175.6325, GNorm = 0.2601, lr_0 = 3.5747e-04
Loss = 1.0547e-02, PNorm = 175.6497, GNorm = 0.2052, lr_0 = 3.5723e-04
Loss = 1.3304e-02, PNorm = 175.6650, GNorm = 0.2825, lr_0 = 3.5698e-04
Loss = 1.2264e-02, PNorm = 175.6792, GNorm = 0.1747, lr_0 = 3.5674e-04
Loss = 9.8430e-03, PNorm = 175.6954, GNorm = 0.1394, lr_0 = 3.5650e-04
Loss = 1.7123e-02, PNorm = 175.7110, GNorm = 0.2748, lr_0 = 3.5625e-04
Loss = 1.2192e-02, PNorm = 175.7270, GNorm = 0.3781, lr_0 = 3.5601e-04
Loss = 1.3217e-02, PNorm = 175.7419, GNorm = 0.3539, lr_0 = 3.5576e-04
Loss = 1.1688e-02, PNorm = 175.7566, GNorm = 0.2073, lr_0 = 3.5552e-04
Loss = 1.1173e-02, PNorm = 175.7726, GNorm = 0.2341, lr_0 = 3.5528e-04
Loss = 2.0699e-02, PNorm = 175.7868, GNorm = 0.3178, lr_0 = 3.5503e-04
Loss = 2.4772e-02, PNorm = 175.8046, GNorm = 0.8282, lr_0 = 3.5479e-04
Loss = 1.5235e-02, PNorm = 175.8226, GNorm = 0.3756, lr_0 = 3.5455e-04
Loss = 1.2550e-02, PNorm = 175.8384, GNorm = 0.1700, lr_0 = 3.5430e-04
Loss = 1.9140e-02, PNorm = 175.8540, GNorm = 0.4197, lr_0 = 3.5406e-04
Loss = 1.2307e-02, PNorm = 175.8715, GNorm = 0.2618, lr_0 = 3.5382e-04
Loss = 1.2433e-02, PNorm = 175.8850, GNorm = 0.3796, lr_0 = 3.5358e-04
Loss = 1.2547e-02, PNorm = 175.9006, GNorm = 0.2301, lr_0 = 3.5333e-04
Loss = 1.3154e-02, PNorm = 175.9166, GNorm = 0.3763, lr_0 = 3.5309e-04
Loss = 1.5728e-02, PNorm = 175.9296, GNorm = 0.2970, lr_0 = 3.5285e-04
Loss = 1.0731e-02, PNorm = 175.9431, GNorm = 0.3451, lr_0 = 3.5261e-04
Loss = 1.1191e-02, PNorm = 175.9614, GNorm = 0.1547, lr_0 = 3.5237e-04
Loss = 1.5988e-02, PNorm = 175.9780, GNorm = 0.2251, lr_0 = 3.5212e-04
Loss = 1.0032e-02, PNorm = 175.9931, GNorm = 0.1602, lr_0 = 3.5188e-04
Loss = 2.0096e-02, PNorm = 176.0128, GNorm = 0.3830, lr_0 = 3.5164e-04
Loss = 2.1259e-02, PNorm = 176.0313, GNorm = 0.1729, lr_0 = 3.5140e-04
Loss = 1.3910e-02, PNorm = 176.0492, GNorm = 0.2746, lr_0 = 3.5116e-04
Loss = 1.4476e-02, PNorm = 176.0623, GNorm = 0.3431, lr_0 = 3.5092e-04
Loss = 1.2938e-02, PNorm = 176.0753, GNorm = 0.1565, lr_0 = 3.5068e-04
Loss = 1.7566e-02, PNorm = 176.0916, GNorm = 0.1772, lr_0 = 3.5044e-04
Loss = 1.1712e-02, PNorm = 176.1083, GNorm = 0.3895, lr_0 = 3.5020e-04
Loss = 1.2536e-02, PNorm = 176.1262, GNorm = 0.2963, lr_0 = 3.4996e-04
Loss = 1.0958e-02, PNorm = 176.1453, GNorm = 0.3977, lr_0 = 3.4972e-04
Loss = 1.1019e-02, PNorm = 176.1593, GNorm = 0.1742, lr_0 = 3.4948e-04
Loss = 1.2024e-02, PNorm = 176.1747, GNorm = 0.2838, lr_0 = 3.4924e-04
Loss = 1.3785e-02, PNorm = 176.1926, GNorm = 0.1969, lr_0 = 3.4900e-04
Loss = 1.2094e-02, PNorm = 176.2093, GNorm = 0.1946, lr_0 = 3.4876e-04
Loss = 1.5447e-02, PNorm = 176.2246, GNorm = 0.1820, lr_0 = 3.4852e-04
Loss = 1.1825e-02, PNorm = 176.2416, GNorm = 0.2061, lr_0 = 3.4828e-04
Loss = 1.5829e-02, PNorm = 176.2602, GNorm = 0.1540, lr_0 = 3.4805e-04
Loss = 1.0507e-02, PNorm = 176.2769, GNorm = 0.2000, lr_0 = 3.4781e-04
Loss = 2.6734e-02, PNorm = 176.2905, GNorm = 0.1024, lr_0 = 3.4757e-04
Loss = 1.4204e-02, PNorm = 176.3094, GNorm = 0.3317, lr_0 = 3.4733e-04
Loss = 1.7379e-02, PNorm = 176.3278, GNorm = 0.3100, lr_0 = 3.4709e-04
Loss = 1.3006e-02, PNorm = 176.3468, GNorm = 0.6520, lr_0 = 3.4686e-04
Loss = 1.2906e-02, PNorm = 176.3647, GNorm = 0.3538, lr_0 = 3.4662e-04
Loss = 2.1103e-02, PNorm = 176.3753, GNorm = 0.5797, lr_0 = 3.4638e-04
Loss = 1.6066e-02, PNorm = 176.3927, GNorm = 0.1772, lr_0 = 3.4614e-04
Loss = 1.8212e-02, PNorm = 176.4103, GNorm = 0.2252, lr_0 = 3.4591e-04
Loss = 1.0208e-02, PNorm = 176.4279, GNorm = 0.2256, lr_0 = 3.4567e-04
Loss = 1.5273e-02, PNorm = 176.4454, GNorm = 0.1815, lr_0 = 3.4543e-04
Loss = 1.3245e-02, PNorm = 176.4629, GNorm = 0.2757, lr_0 = 3.4520e-04
Loss = 1.4508e-02, PNorm = 176.4825, GNorm = 0.5831, lr_0 = 3.4496e-04
Loss = 1.2770e-02, PNorm = 176.5025, GNorm = 0.2097, lr_0 = 3.4472e-04
Loss = 1.8176e-02, PNorm = 176.5186, GNorm = 0.3347, lr_0 = 3.4449e-04
Loss = 1.0714e-02, PNorm = 176.5345, GNorm = 0.3137, lr_0 = 3.4425e-04
Loss = 1.6796e-02, PNorm = 176.5486, GNorm = 0.2154, lr_0 = 3.4402e-04
Loss = 1.7687e-02, PNorm = 176.5649, GNorm = 0.4117, lr_0 = 3.4378e-04
Loss = 1.1859e-02, PNorm = 176.5833, GNorm = 0.2317, lr_0 = 3.4354e-04
Loss = 1.3672e-02, PNorm = 176.6006, GNorm = 0.1613, lr_0 = 3.4331e-04
Validation mae = 0.122032
Epoch 15
Loss = 1.1522e-02, PNorm = 176.6170, GNorm = 0.3778, lr_0 = 3.4307e-04
Loss = 1.0435e-02, PNorm = 176.6326, GNorm = 0.3053, lr_0 = 3.4284e-04
Loss = 1.2126e-02, PNorm = 176.6440, GNorm = 0.5327, lr_0 = 3.4260e-04
Loss = 1.2562e-02, PNorm = 176.6549, GNorm = 0.2260, lr_0 = 3.4237e-04
Loss = 1.2700e-02, PNorm = 176.6649, GNorm = 0.4619, lr_0 = 3.4213e-04
Loss = 9.2493e-03, PNorm = 176.6776, GNorm = 0.2323, lr_0 = 3.4190e-04
Loss = 1.0976e-02, PNorm = 176.6915, GNorm = 0.2138, lr_0 = 3.4167e-04
Loss = 1.0924e-02, PNorm = 176.7057, GNorm = 0.4267, lr_0 = 3.4143e-04
Loss = 1.5137e-02, PNorm = 176.7163, GNorm = 0.2284, lr_0 = 3.4120e-04
Loss = 1.4348e-02, PNorm = 176.7267, GNorm = 0.3232, lr_0 = 3.4096e-04
Loss = 1.1998e-02, PNorm = 176.7382, GNorm = 0.3070, lr_0 = 3.4073e-04
Loss = 1.5224e-02, PNorm = 176.7520, GNorm = 0.4171, lr_0 = 3.4050e-04
Loss = 1.2210e-02, PNorm = 176.7646, GNorm = 0.2551, lr_0 = 3.4026e-04
Loss = 1.2741e-02, PNorm = 176.7771, GNorm = 0.2756, lr_0 = 3.4003e-04
Loss = 1.2453e-02, PNorm = 176.7824, GNorm = 0.4906, lr_0 = 3.3980e-04
Loss = 1.3303e-02, PNorm = 176.7991, GNorm = 0.2504, lr_0 = 3.3956e-04
Loss = 1.1370e-02, PNorm = 176.8140, GNorm = 0.2532, lr_0 = 3.3933e-04
Loss = 1.3262e-02, PNorm = 176.8281, GNorm = 0.1560, lr_0 = 3.3910e-04
Loss = 1.1475e-02, PNorm = 176.8387, GNorm = 0.1956, lr_0 = 3.3887e-04
Loss = 1.0373e-02, PNorm = 176.8501, GNorm = 0.1682, lr_0 = 3.3864e-04
Loss = 1.3206e-02, PNorm = 176.8667, GNorm = 0.5901, lr_0 = 3.3840e-04
Loss = 1.2697e-02, PNorm = 176.8793, GNorm = 0.3358, lr_0 = 3.3817e-04
Loss = 9.1840e-03, PNorm = 176.8921, GNorm = 0.2722, lr_0 = 3.3794e-04
Loss = 1.2111e-02, PNorm = 176.9039, GNorm = 0.3335, lr_0 = 3.3771e-04
Loss = 1.6009e-02, PNorm = 176.9165, GNorm = 0.2577, lr_0 = 3.3748e-04
Loss = 9.9957e-03, PNorm = 176.9315, GNorm = 0.3132, lr_0 = 3.3725e-04
Loss = 1.0686e-02, PNorm = 176.9417, GNorm = 0.1943, lr_0 = 3.3701e-04
Loss = 1.5386e-02, PNorm = 176.9519, GNorm = 0.3459, lr_0 = 3.3678e-04
Loss = 1.1199e-02, PNorm = 176.9616, GNorm = 0.3365, lr_0 = 3.3655e-04
Loss = 9.8392e-03, PNorm = 176.9703, GNorm = 0.2153, lr_0 = 3.3632e-04
Loss = 1.1283e-02, PNorm = 176.9835, GNorm = 0.1206, lr_0 = 3.3609e-04
Loss = 8.3363e-03, PNorm = 176.9963, GNorm = 0.4526, lr_0 = 3.3586e-04
Loss = 1.0951e-02, PNorm = 177.0082, GNorm = 0.5530, lr_0 = 3.3563e-04
Loss = 9.5300e-03, PNorm = 177.0227, GNorm = 0.4340, lr_0 = 3.3540e-04
Loss = 1.0102e-02, PNorm = 177.0351, GNorm = 0.1521, lr_0 = 3.3517e-04
Loss = 1.1721e-02, PNorm = 177.0462, GNorm = 0.4615, lr_0 = 3.3494e-04
Loss = 1.9234e-02, PNorm = 177.0589, GNorm = 0.4878, lr_0 = 3.3471e-04
Loss = 1.5661e-02, PNorm = 177.0715, GNorm = 0.1755, lr_0 = 3.3448e-04
Loss = 9.4492e-03, PNorm = 177.0849, GNorm = 0.2320, lr_0 = 3.3425e-04
Loss = 1.0592e-02, PNorm = 177.0997, GNorm = 0.2930, lr_0 = 3.3403e-04
Loss = 8.7286e-03, PNorm = 177.1138, GNorm = 0.4678, lr_0 = 3.3380e-04
Loss = 1.5585e-02, PNorm = 177.1252, GNorm = 0.4053, lr_0 = 3.3357e-04
Loss = 1.1893e-02, PNorm = 177.1378, GNorm = 0.2632, lr_0 = 3.3334e-04
Loss = 1.2848e-02, PNorm = 177.1476, GNorm = 0.1772, lr_0 = 3.3311e-04
Loss = 1.2403e-02, PNorm = 177.1579, GNorm = 0.2250, lr_0 = 3.3288e-04
Loss = 1.3101e-02, PNorm = 177.1700, GNorm = 0.3444, lr_0 = 3.3265e-04
Loss = 1.6177e-02, PNorm = 177.1807, GNorm = 0.3533, lr_0 = 3.3243e-04
Loss = 2.1855e-02, PNorm = 177.1943, GNorm = 0.1204, lr_0 = 3.3220e-04
Loss = 1.3409e-02, PNorm = 177.2075, GNorm = 0.3334, lr_0 = 3.3197e-04
Loss = 1.1269e-02, PNorm = 177.2178, GNorm = 0.1675, lr_0 = 3.3174e-04
Loss = 1.2056e-02, PNorm = 177.2285, GNorm = 0.2386, lr_0 = 3.3152e-04
Loss = 1.0052e-02, PNorm = 177.2417, GNorm = 0.3619, lr_0 = 3.3129e-04
Loss = 9.9864e-03, PNorm = 177.2539, GNorm = 0.2159, lr_0 = 3.3106e-04
Loss = 1.2692e-02, PNorm = 177.2676, GNorm = 0.3119, lr_0 = 3.3084e-04
Loss = 1.1409e-02, PNorm = 177.2797, GNorm = 0.4130, lr_0 = 3.3061e-04
Loss = 9.7168e-03, PNorm = 177.2920, GNorm = 0.2641, lr_0 = 3.3038e-04
Loss = 1.1494e-02, PNorm = 177.3078, GNorm = 0.2217, lr_0 = 3.3016e-04
Loss = 1.5731e-02, PNorm = 177.3169, GNorm = 0.1455, lr_0 = 3.2993e-04
Loss = 1.8671e-02, PNorm = 177.3281, GNorm = 0.5222, lr_0 = 3.2970e-04
Loss = 1.1694e-02, PNorm = 177.3483, GNorm = 0.4038, lr_0 = 3.2948e-04
Loss = 9.7459e-03, PNorm = 177.3611, GNorm = 0.1722, lr_0 = 3.2925e-04
Loss = 1.0784e-02, PNorm = 177.3754, GNorm = 0.1920, lr_0 = 3.2903e-04
Loss = 1.5087e-02, PNorm = 177.3895, GNorm = 0.3203, lr_0 = 3.2880e-04
Loss = 1.9705e-02, PNorm = 177.4083, GNorm = 0.4699, lr_0 = 3.2858e-04
Loss = 1.4950e-02, PNorm = 177.4279, GNorm = 0.2219, lr_0 = 3.2835e-04
Loss = 1.2053e-02, PNorm = 177.4417, GNorm = 0.1860, lr_0 = 3.2813e-04
Loss = 1.2731e-02, PNorm = 177.4551, GNorm = 0.3189, lr_0 = 3.2790e-04
Loss = 1.2848e-02, PNorm = 177.4686, GNorm = 0.3159, lr_0 = 3.2768e-04
Loss = 1.5205e-02, PNorm = 177.4791, GNorm = 0.2980, lr_0 = 3.2745e-04
Loss = 1.0117e-02, PNorm = 177.4940, GNorm = 0.1366, lr_0 = 3.2723e-04
Loss = 1.5347e-02, PNorm = 177.5085, GNorm = 0.2974, lr_0 = 3.2700e-04
Loss = 1.1642e-02, PNorm = 177.5211, GNorm = 0.3890, lr_0 = 3.2678e-04
Loss = 9.6229e-03, PNorm = 177.5359, GNorm = 0.2816, lr_0 = 3.2656e-04
Loss = 1.2116e-02, PNorm = 177.5474, GNorm = 0.1521, lr_0 = 3.2633e-04
Loss = 1.2836e-02, PNorm = 177.5610, GNorm = 0.1347, lr_0 = 3.2611e-04
Loss = 1.4398e-02, PNorm = 177.5791, GNorm = 0.2006, lr_0 = 3.2589e-04
Loss = 1.1273e-02, PNorm = 177.5934, GNorm = 0.6426, lr_0 = 3.2566e-04
Loss = 1.0159e-02, PNorm = 177.6073, GNorm = 0.1473, lr_0 = 3.2544e-04
Loss = 9.5018e-03, PNorm = 177.6180, GNorm = 0.1852, lr_0 = 3.2522e-04
Loss = 1.3836e-02, PNorm = 177.6286, GNorm = 0.1198, lr_0 = 3.2499e-04
Loss = 1.1392e-02, PNorm = 177.6418, GNorm = 0.1806, lr_0 = 3.2477e-04
Loss = 1.1683e-02, PNorm = 177.6562, GNorm = 0.3094, lr_0 = 3.2455e-04
Loss = 1.1013e-02, PNorm = 177.6686, GNorm = 0.3013, lr_0 = 3.2433e-04
Loss = 1.3053e-02, PNorm = 177.6755, GNorm = 0.1968, lr_0 = 3.2410e-04
Loss = 8.5597e-03, PNorm = 177.6858, GNorm = 0.1765, lr_0 = 3.2388e-04
Loss = 1.4141e-02, PNorm = 177.6967, GNorm = 0.3603, lr_0 = 3.2366e-04
Loss = 1.2113e-02, PNorm = 177.7132, GNorm = 0.2001, lr_0 = 3.2344e-04
Loss = 1.2034e-02, PNorm = 177.7282, GNorm = 0.2550, lr_0 = 3.2322e-04
Loss = 1.4694e-02, PNorm = 177.7413, GNorm = 0.9295, lr_0 = 3.2300e-04
Loss = 1.3776e-02, PNorm = 177.7571, GNorm = 0.2117, lr_0 = 3.2277e-04
Loss = 1.0870e-02, PNorm = 177.7729, GNorm = 0.4038, lr_0 = 3.2255e-04
Loss = 8.7812e-03, PNorm = 177.7837, GNorm = 0.1751, lr_0 = 3.2233e-04
Loss = 1.3809e-02, PNorm = 177.7988, GNorm = 0.3432, lr_0 = 3.2211e-04
Loss = 9.9762e-03, PNorm = 177.8123, GNorm = 0.2740, lr_0 = 3.2189e-04
Loss = 1.7684e-02, PNorm = 177.8267, GNorm = 0.1667, lr_0 = 3.2167e-04
Loss = 1.1484e-02, PNorm = 177.8407, GNorm = 0.1704, lr_0 = 3.2145e-04
Loss = 9.2299e-03, PNorm = 177.8521, GNorm = 0.2884, lr_0 = 3.2123e-04
Loss = 1.0439e-02, PNorm = 177.8640, GNorm = 0.3235, lr_0 = 3.2101e-04
Loss = 1.0414e-02, PNorm = 177.8757, GNorm = 0.3719, lr_0 = 3.2079e-04
Loss = 9.2842e-03, PNorm = 177.8904, GNorm = 0.2942, lr_0 = 3.2057e-04
Loss = 1.4339e-02, PNorm = 177.9057, GNorm = 0.2217, lr_0 = 3.2035e-04
Loss = 1.3141e-02, PNorm = 177.9201, GNorm = 0.2316, lr_0 = 3.2013e-04
Loss = 1.3462e-02, PNorm = 177.9336, GNorm = 0.1818, lr_0 = 3.1991e-04
Loss = 2.9841e-02, PNorm = 177.9484, GNorm = 0.3575, lr_0 = 3.1969e-04
Loss = 1.0541e-02, PNorm = 177.9617, GNorm = 0.5593, lr_0 = 3.1947e-04
Loss = 1.1909e-02, PNorm = 177.9767, GNorm = 0.2805, lr_0 = 3.1925e-04
Loss = 1.2424e-02, PNorm = 177.9915, GNorm = 0.5450, lr_0 = 3.1904e-04
Loss = 1.2568e-02, PNorm = 178.0072, GNorm = 0.1203, lr_0 = 3.1882e-04
Loss = 1.2671e-02, PNorm = 178.0162, GNorm = 0.2124, lr_0 = 3.1860e-04
Loss = 1.1062e-02, PNorm = 178.0270, GNorm = 0.2090, lr_0 = 3.1838e-04
Loss = 1.4364e-02, PNorm = 178.0399, GNorm = 0.1587, lr_0 = 3.1816e-04
Loss = 1.4854e-02, PNorm = 178.0545, GNorm = 0.2361, lr_0 = 3.1794e-04
Loss = 1.1524e-02, PNorm = 178.0689, GNorm = 0.3663, lr_0 = 3.1773e-04
Loss = 1.8054e-02, PNorm = 178.0804, GNorm = 0.2563, lr_0 = 3.1751e-04
Loss = 1.2460e-02, PNorm = 178.0945, GNorm = 0.2576, lr_0 = 3.1729e-04
Loss = 1.4080e-02, PNorm = 178.1087, GNorm = 0.4002, lr_0 = 3.1707e-04
Loss = 1.1476e-02, PNorm = 178.1242, GNorm = 0.2311, lr_0 = 3.1686e-04
Loss = 1.3725e-02, PNorm = 178.1383, GNorm = 0.2113, lr_0 = 3.1664e-04
Loss = 1.3051e-02, PNorm = 178.1553, GNorm = 0.1776, lr_0 = 3.1642e-04
Loss = 1.0666e-02, PNorm = 178.1707, GNorm = 0.2918, lr_0 = 3.1621e-04
Validation mae = 0.121623
Epoch 16
Loss = 1.1640e-02, PNorm = 178.1829, GNorm = 0.2139, lr_0 = 3.1599e-04
Loss = 1.0244e-02, PNorm = 178.1951, GNorm = 0.2155, lr_0 = 3.1577e-04
Loss = 1.0352e-02, PNorm = 178.2039, GNorm = 0.1343, lr_0 = 3.1556e-04
Loss = 1.3975e-02, PNorm = 178.2117, GNorm = 0.2843, lr_0 = 3.1534e-04
Loss = 1.1365e-02, PNorm = 178.2220, GNorm = 0.4063, lr_0 = 3.1512e-04
Loss = 1.0736e-02, PNorm = 178.2333, GNorm = 0.3580, lr_0 = 3.1491e-04
Loss = 1.3354e-02, PNorm = 178.2429, GNorm = 0.5190, lr_0 = 3.1469e-04
Loss = 1.3444e-02, PNorm = 178.2534, GNorm = 0.1531, lr_0 = 3.1448e-04
Loss = 9.7322e-03, PNorm = 178.2647, GNorm = 0.1708, lr_0 = 3.1426e-04
Loss = 9.7419e-03, PNorm = 178.2767, GNorm = 0.1882, lr_0 = 3.1405e-04
Loss = 1.2459e-02, PNorm = 178.2875, GNorm = 0.5981, lr_0 = 3.1383e-04
Loss = 1.3476e-02, PNorm = 178.2985, GNorm = 0.2182, lr_0 = 3.1362e-04
Loss = 1.1574e-02, PNorm = 178.3085, GNorm = 0.2400, lr_0 = 3.1340e-04
Loss = 1.0372e-02, PNorm = 178.3207, GNorm = 0.1999, lr_0 = 3.1319e-04
Loss = 1.1881e-02, PNorm = 178.3307, GNorm = 0.2776, lr_0 = 3.1297e-04
Loss = 1.0999e-02, PNorm = 178.3406, GNorm = 0.4961, lr_0 = 3.1276e-04
Loss = 7.9004e-03, PNorm = 178.3509, GNorm = 0.1752, lr_0 = 3.1254e-04
Loss = 8.7710e-03, PNorm = 178.3595, GNorm = 0.2569, lr_0 = 3.1233e-04
Loss = 1.0857e-02, PNorm = 178.3697, GNorm = 0.1509, lr_0 = 3.1212e-04
Loss = 9.7553e-03, PNorm = 178.3820, GNorm = 0.1732, lr_0 = 3.1190e-04
Loss = 1.0049e-02, PNorm = 178.3944, GNorm = 0.2308, lr_0 = 3.1169e-04
Loss = 7.5016e-03, PNorm = 178.4054, GNorm = 0.2459, lr_0 = 3.1147e-04
Loss = 9.7788e-03, PNorm = 178.4127, GNorm = 0.4175, lr_0 = 3.1126e-04
Loss = 1.0043e-02, PNorm = 178.4214, GNorm = 0.1919, lr_0 = 3.1105e-04
Loss = 1.2854e-02, PNorm = 178.4330, GNorm = 0.2861, lr_0 = 3.1083e-04
Loss = 1.1842e-02, PNorm = 178.4414, GNorm = 0.2353, lr_0 = 3.1062e-04
Loss = 1.2371e-02, PNorm = 178.4496, GNorm = 0.1800, lr_0 = 3.1041e-04
Loss = 1.1985e-02, PNorm = 178.4594, GNorm = 0.1090, lr_0 = 3.1020e-04
Loss = 1.1917e-02, PNorm = 178.4679, GNorm = 0.3441, lr_0 = 3.0998e-04
Loss = 1.2879e-02, PNorm = 178.4791, GNorm = 0.4920, lr_0 = 3.0977e-04
Loss = 1.1121e-02, PNorm = 178.4907, GNorm = 0.2834, lr_0 = 3.0956e-04
Loss = 1.0380e-02, PNorm = 178.5033, GNorm = 0.1123, lr_0 = 3.0935e-04
Loss = 2.1185e-02, PNorm = 178.5187, GNorm = 2.6293, lr_0 = 3.0914e-04
Loss = 1.0638e-02, PNorm = 178.5310, GNorm = 0.1170, lr_0 = 3.0892e-04
Loss = 1.2205e-02, PNorm = 178.5434, GNorm = 0.4515, lr_0 = 3.0871e-04
Loss = 1.0355e-02, PNorm = 178.5571, GNorm = 0.3722, lr_0 = 3.0850e-04
Loss = 9.0025e-03, PNorm = 178.5668, GNorm = 0.1841, lr_0 = 3.0829e-04
Loss = 8.6190e-03, PNorm = 178.5812, GNorm = 0.2208, lr_0 = 3.0808e-04
Loss = 1.3730e-02, PNorm = 178.5934, GNorm = 0.2970, lr_0 = 3.0787e-04
Loss = 1.2514e-02, PNorm = 178.6036, GNorm = 0.3162, lr_0 = 3.0766e-04
Loss = 1.3044e-02, PNorm = 178.6142, GNorm = 0.1931, lr_0 = 3.0745e-04
Loss = 1.1631e-02, PNorm = 178.6275, GNorm = 0.4929, lr_0 = 3.0723e-04
Loss = 1.0025e-02, PNorm = 178.6372, GNorm = 0.3365, lr_0 = 3.0702e-04
Loss = 9.5933e-03, PNorm = 178.6463, GNorm = 0.5446, lr_0 = 3.0681e-04
Loss = 8.3976e-03, PNorm = 178.6590, GNorm = 0.2186, lr_0 = 3.0660e-04
Loss = 8.6245e-03, PNorm = 178.6684, GNorm = 0.1349, lr_0 = 3.0639e-04
Loss = 1.2661e-02, PNorm = 178.6765, GNorm = 0.4045, lr_0 = 3.0618e-04
Loss = 1.7144e-02, PNorm = 178.6910, GNorm = 0.5308, lr_0 = 3.0597e-04
Loss = 1.5447e-02, PNorm = 178.6991, GNorm = 0.2887, lr_0 = 3.0576e-04
Loss = 1.1626e-02, PNorm = 178.7124, GNorm = 0.1362, lr_0 = 3.0555e-04
Loss = 1.1966e-02, PNorm = 178.7259, GNorm = 0.2541, lr_0 = 3.0535e-04
Loss = 1.1558e-02, PNorm = 178.7370, GNorm = 0.2370, lr_0 = 3.0514e-04
Loss = 1.0537e-02, PNorm = 178.7448, GNorm = 0.2439, lr_0 = 3.0493e-04
Loss = 9.4097e-03, PNorm = 178.7536, GNorm = 0.1562, lr_0 = 3.0472e-04
Loss = 1.0349e-02, PNorm = 178.7611, GNorm = 0.3000, lr_0 = 3.0451e-04
Loss = 7.4381e-03, PNorm = 178.7709, GNorm = 0.1621, lr_0 = 3.0430e-04
Loss = 1.2496e-02, PNorm = 178.7838, GNorm = 0.3336, lr_0 = 3.0409e-04
Loss = 9.9057e-03, PNorm = 178.7952, GNorm = 0.5167, lr_0 = 3.0388e-04
Loss = 9.0703e-03, PNorm = 178.8064, GNorm = 0.1233, lr_0 = 3.0368e-04
Loss = 1.1803e-02, PNorm = 178.8177, GNorm = 0.1210, lr_0 = 3.0347e-04
Loss = 1.2199e-02, PNorm = 178.8262, GNorm = 0.2078, lr_0 = 3.0326e-04
Loss = 1.3217e-02, PNorm = 178.8356, GNorm = 0.2949, lr_0 = 3.0305e-04
Loss = 8.9723e-03, PNorm = 178.8499, GNorm = 0.2515, lr_0 = 3.0284e-04
Loss = 7.8453e-03, PNorm = 178.8632, GNorm = 0.2647, lr_0 = 3.0264e-04
Loss = 9.2456e-03, PNorm = 178.8730, GNorm = 0.2909, lr_0 = 3.0243e-04
Loss = 1.2275e-02, PNorm = 178.8824, GNorm = 0.2548, lr_0 = 3.0222e-04
Loss = 1.0045e-02, PNorm = 178.8922, GNorm = 0.1257, lr_0 = 3.0202e-04
Loss = 9.1335e-03, PNorm = 178.9022, GNorm = 0.2177, lr_0 = 3.0181e-04
Loss = 6.8116e-03, PNorm = 178.9129, GNorm = 0.1682, lr_0 = 3.0160e-04
Loss = 1.1195e-02, PNorm = 178.9244, GNorm = 0.3143, lr_0 = 3.0140e-04
Loss = 1.3798e-02, PNorm = 178.9337, GNorm = 0.2467, lr_0 = 3.0119e-04
Loss = 1.2700e-02, PNorm = 178.9481, GNorm = 0.1148, lr_0 = 3.0098e-04
Loss = 1.0801e-02, PNorm = 178.9599, GNorm = 0.1991, lr_0 = 3.0078e-04
Loss = 1.1700e-02, PNorm = 178.9710, GNorm = 0.3963, lr_0 = 3.0057e-04
Loss = 1.0444e-02, PNorm = 178.9826, GNorm = 0.3080, lr_0 = 3.0036e-04
Loss = 1.0553e-02, PNorm = 178.9939, GNorm = 0.3956, lr_0 = 3.0016e-04
Loss = 1.0640e-02, PNorm = 179.0066, GNorm = 0.1854, lr_0 = 2.9995e-04
Loss = 1.1068e-02, PNorm = 179.0185, GNorm = 0.3798, lr_0 = 2.9975e-04
Loss = 8.3290e-03, PNorm = 179.0308, GNorm = 0.1671, lr_0 = 2.9954e-04
Loss = 1.2546e-02, PNorm = 179.0426, GNorm = 0.1586, lr_0 = 2.9934e-04
Loss = 8.2514e-03, PNorm = 179.0534, GNorm = 0.1637, lr_0 = 2.9913e-04
Loss = 8.2161e-03, PNorm = 179.0628, GNorm = 0.1481, lr_0 = 2.9893e-04
Loss = 1.4404e-02, PNorm = 179.0702, GNorm = 0.3340, lr_0 = 2.9872e-04
Loss = 1.0076e-02, PNorm = 179.0796, GNorm = 0.2316, lr_0 = 2.9852e-04
Loss = 1.0809e-02, PNorm = 179.0929, GNorm = 0.3223, lr_0 = 2.9831e-04
Loss = 8.4320e-03, PNorm = 179.1052, GNorm = 0.1991, lr_0 = 2.9811e-04
Loss = 7.6112e-03, PNorm = 179.1161, GNorm = 0.2819, lr_0 = 2.9790e-04
Loss = 1.0003e-02, PNorm = 179.1276, GNorm = 0.2276, lr_0 = 2.9770e-04
Loss = 1.3762e-02, PNorm = 179.1375, GNorm = 0.2247, lr_0 = 2.9750e-04
Loss = 9.7204e-03, PNorm = 179.1468, GNorm = 0.2083, lr_0 = 2.9729e-04
Loss = 8.9192e-03, PNorm = 179.1554, GNorm = 0.2767, lr_0 = 2.9709e-04
Loss = 1.1833e-02, PNorm = 179.1663, GNorm = 0.4056, lr_0 = 2.9689e-04
Loss = 9.0436e-03, PNorm = 179.1762, GNorm = 0.4792, lr_0 = 2.9668e-04
Loss = 9.2555e-03, PNorm = 179.1888, GNorm = 0.1544, lr_0 = 2.9648e-04
Loss = 1.1709e-02, PNorm = 179.1989, GNorm = 0.2636, lr_0 = 2.9628e-04
Loss = 8.7277e-03, PNorm = 179.2108, GNorm = 0.3293, lr_0 = 2.9607e-04
Loss = 1.1883e-02, PNorm = 179.2226, GNorm = 0.3264, lr_0 = 2.9587e-04
Loss = 8.0091e-03, PNorm = 179.2332, GNorm = 0.1175, lr_0 = 2.9567e-04
Loss = 1.0705e-02, PNorm = 179.2438, GNorm = 0.2391, lr_0 = 2.9546e-04
Loss = 1.3140e-02, PNorm = 179.2552, GNorm = 0.1588, lr_0 = 2.9526e-04
Loss = 1.0101e-02, PNorm = 179.2679, GNorm = 0.2260, lr_0 = 2.9506e-04
Loss = 7.3821e-03, PNorm = 179.2803, GNorm = 0.2215, lr_0 = 2.9486e-04
Loss = 9.4829e-03, PNorm = 179.2924, GNorm = 0.2227, lr_0 = 2.9466e-04
Loss = 1.3239e-02, PNorm = 179.3048, GNorm = 0.1195, lr_0 = 2.9445e-04
Loss = 7.8720e-03, PNorm = 179.3150, GNorm = 0.1418, lr_0 = 2.9425e-04
Loss = 8.0373e-03, PNorm = 179.3282, GNorm = 0.1650, lr_0 = 2.9405e-04
Loss = 1.1414e-02, PNorm = 179.3393, GNorm = 0.2326, lr_0 = 2.9385e-04
Loss = 9.6824e-03, PNorm = 179.3551, GNorm = 0.3177, lr_0 = 2.9365e-04
Loss = 1.1788e-02, PNorm = 179.3660, GNorm = 0.1682, lr_0 = 2.9345e-04
Loss = 9.4847e-03, PNorm = 179.3763, GNorm = 0.2731, lr_0 = 2.9325e-04
Loss = 1.8508e-02, PNorm = 179.3865, GNorm = 0.2698, lr_0 = 2.9305e-04
Loss = 8.6676e-03, PNorm = 179.3972, GNorm = 0.2370, lr_0 = 2.9284e-04
Loss = 8.4062e-03, PNorm = 179.4067, GNorm = 0.1798, lr_0 = 2.9264e-04
Loss = 1.4248e-02, PNorm = 179.4172, GNorm = 0.2413, lr_0 = 2.9244e-04
Loss = 1.4692e-02, PNorm = 179.4302, GNorm = 0.2299, lr_0 = 2.9224e-04
Loss = 1.5774e-02, PNorm = 179.4421, GNorm = 0.1668, lr_0 = 2.9204e-04
Loss = 8.1680e-03, PNorm = 179.4518, GNorm = 0.1718, lr_0 = 2.9184e-04
Loss = 1.2788e-02, PNorm = 179.4643, GNorm = 0.2019, lr_0 = 2.9164e-04
Loss = 8.6430e-03, PNorm = 179.4776, GNorm = 0.2149, lr_0 = 2.9144e-04
Loss = 7.9527e-03, PNorm = 179.4902, GNorm = 0.4905, lr_0 = 2.9124e-04
Validation mae = 0.121654
Epoch 17
Loss = 8.1374e-03, PNorm = 179.4996, GNorm = 0.1587, lr_0 = 2.9104e-04
Loss = 8.7413e-03, PNorm = 179.5078, GNorm = 0.2920, lr_0 = 2.9084e-04
Loss = 9.4452e-03, PNorm = 179.5152, GNorm = 0.2397, lr_0 = 2.9065e-04
Loss = 7.4627e-03, PNorm = 179.5229, GNorm = 0.2195, lr_0 = 2.9045e-04
Loss = 1.1418e-02, PNorm = 179.5305, GNorm = 0.1837, lr_0 = 2.9025e-04
Loss = 7.8051e-03, PNorm = 179.5374, GNorm = 0.2570, lr_0 = 2.9005e-04
Loss = 9.4598e-03, PNorm = 179.5466, GNorm = 0.1706, lr_0 = 2.8985e-04
Loss = 9.3356e-03, PNorm = 179.5564, GNorm = 0.4340, lr_0 = 2.8965e-04
Loss = 8.2007e-03, PNorm = 179.5643, GNorm = 0.1513, lr_0 = 2.8945e-04
Loss = 6.2097e-03, PNorm = 179.5734, GNorm = 0.2345, lr_0 = 2.8925e-04
Loss = 8.9788e-03, PNorm = 179.5841, GNorm = 0.2162, lr_0 = 2.8906e-04
Loss = 6.5201e-03, PNorm = 179.5900, GNorm = 0.2533, lr_0 = 2.8886e-04
Loss = 9.2835e-03, PNorm = 179.5972, GNorm = 0.2217, lr_0 = 2.8866e-04
Loss = 8.6071e-03, PNorm = 179.6035, GNorm = 0.1518, lr_0 = 2.8846e-04
Loss = 8.7619e-03, PNorm = 179.6098, GNorm = 0.3122, lr_0 = 2.8826e-04
Loss = 8.5736e-03, PNorm = 179.6193, GNorm = 0.1454, lr_0 = 2.8807e-04
Loss = 6.8871e-03, PNorm = 179.6284, GNorm = 0.1451, lr_0 = 2.8787e-04
Loss = 9.1841e-03, PNorm = 179.6383, GNorm = 0.4593, lr_0 = 2.8767e-04
Loss = 1.2175e-02, PNorm = 179.6477, GNorm = 0.5434, lr_0 = 2.8748e-04
Loss = 9.5782e-03, PNorm = 179.6542, GNorm = 0.1821, lr_0 = 2.8728e-04
Loss = 6.9952e-03, PNorm = 179.6630, GNorm = 0.1488, lr_0 = 2.8708e-04
Loss = 8.2041e-03, PNorm = 179.6710, GNorm = 0.1441, lr_0 = 2.8689e-04
Loss = 6.9674e-03, PNorm = 179.6799, GNorm = 0.3617, lr_0 = 2.8669e-04
Loss = 7.4260e-03, PNorm = 179.6871, GNorm = 0.1595, lr_0 = 2.8649e-04
Loss = 1.4462e-02, PNorm = 179.6945, GNorm = 0.7409, lr_0 = 2.8630e-04
Loss = 6.5943e-03, PNorm = 179.7007, GNorm = 0.1688, lr_0 = 2.8610e-04
Loss = 1.1045e-02, PNorm = 179.7075, GNorm = 0.1507, lr_0 = 2.8590e-04
Loss = 1.0262e-02, PNorm = 179.7192, GNorm = 0.1229, lr_0 = 2.8571e-04
Loss = 1.0727e-02, PNorm = 179.7277, GNorm = 0.2218, lr_0 = 2.8551e-04
Loss = 1.0808e-02, PNorm = 179.7361, GNorm = 0.1702, lr_0 = 2.8532e-04
Loss = 7.8613e-03, PNorm = 179.7463, GNorm = 0.2518, lr_0 = 2.8512e-04
Loss = 7.5170e-03, PNorm = 179.7553, GNorm = 0.2388, lr_0 = 2.8493e-04
Loss = 8.1676e-03, PNorm = 179.7632, GNorm = 0.3220, lr_0 = 2.8473e-04
Loss = 9.7909e-03, PNorm = 179.7736, GNorm = 0.1389, lr_0 = 2.8454e-04
Loss = 6.6202e-03, PNorm = 179.7816, GNorm = 0.1243, lr_0 = 2.8434e-04
Loss = 8.6412e-03, PNorm = 179.7926, GNorm = 0.3080, lr_0 = 2.8415e-04
Loss = 1.0639e-02, PNorm = 179.8033, GNorm = 0.1198, lr_0 = 2.8395e-04
Loss = 9.3120e-03, PNorm = 179.8116, GNorm = 0.7560, lr_0 = 2.8376e-04
Loss = 9.5501e-03, PNorm = 179.8196, GNorm = 0.2434, lr_0 = 2.8356e-04
Loss = 6.9355e-03, PNorm = 179.8288, GNorm = 0.1322, lr_0 = 2.8337e-04
Loss = 1.1792e-02, PNorm = 179.8429, GNorm = 0.3389, lr_0 = 2.8317e-04
Loss = 8.7837e-03, PNorm = 179.8529, GNorm = 0.1320, lr_0 = 2.8298e-04
Loss = 7.4374e-03, PNorm = 179.8582, GNorm = 0.2510, lr_0 = 2.8279e-04
Loss = 7.8756e-03, PNorm = 179.8661, GNorm = 0.2263, lr_0 = 2.8259e-04
Loss = 1.1023e-02, PNorm = 179.8749, GNorm = 1.6529, lr_0 = 2.8240e-04
Loss = 1.1840e-02, PNorm = 179.8821, GNorm = 0.1329, lr_0 = 2.8221e-04
Loss = 1.0280e-02, PNorm = 179.8922, GNorm = 0.1481, lr_0 = 2.8201e-04
Loss = 1.5335e-02, PNorm = 179.8995, GNorm = 1.1409, lr_0 = 2.8182e-04
Loss = 9.1788e-03, PNorm = 179.9059, GNorm = 0.6609, lr_0 = 2.8163e-04
Loss = 8.6235e-03, PNorm = 179.9149, GNorm = 0.2572, lr_0 = 2.8143e-04
Loss = 6.7208e-03, PNorm = 179.9237, GNorm = 0.1760, lr_0 = 2.8124e-04
Loss = 1.0502e-02, PNorm = 179.9325, GNorm = 0.2269, lr_0 = 2.8105e-04
Loss = 7.2529e-03, PNorm = 179.9427, GNorm = 0.2494, lr_0 = 2.8085e-04
Loss = 8.5675e-03, PNorm = 179.9518, GNorm = 0.2583, lr_0 = 2.8066e-04
Loss = 1.3021e-02, PNorm = 179.9621, GNorm = 0.2589, lr_0 = 2.8047e-04
Loss = 1.0778e-02, PNorm = 179.9707, GNorm = 0.2979, lr_0 = 2.8028e-04
Loss = 1.0611e-02, PNorm = 179.9814, GNorm = 0.2981, lr_0 = 2.8009e-04
Loss = 8.1528e-03, PNorm = 179.9920, GNorm = 0.3837, lr_0 = 2.7989e-04
Loss = 1.9719e-02, PNorm = 180.0023, GNorm = 0.1464, lr_0 = 2.7970e-04
Loss = 1.1332e-02, PNorm = 180.0137, GNorm = 0.2218, lr_0 = 2.7951e-04
Loss = 8.1045e-03, PNorm = 180.0260, GNorm = 0.1885, lr_0 = 2.7932e-04
Loss = 9.3290e-03, PNorm = 180.0350, GNorm = 0.2957, lr_0 = 2.7913e-04
Loss = 1.1521e-02, PNorm = 180.0401, GNorm = 0.2877, lr_0 = 2.7894e-04
Loss = 7.3514e-03, PNorm = 180.0500, GNorm = 0.3348, lr_0 = 2.7875e-04
Loss = 9.4265e-03, PNorm = 180.0617, GNorm = 0.2692, lr_0 = 2.7855e-04
Loss = 7.3147e-03, PNorm = 180.0713, GNorm = 0.1406, lr_0 = 2.7836e-04
Loss = 8.2485e-03, PNorm = 180.0789, GNorm = 0.2730, lr_0 = 2.7817e-04
Loss = 1.0992e-02, PNorm = 180.0885, GNorm = 0.5031, lr_0 = 2.7798e-04
Loss = 6.5711e-03, PNorm = 180.0982, GNorm = 0.2727, lr_0 = 2.7779e-04
Loss = 6.1230e-03, PNorm = 180.1058, GNorm = 0.2853, lr_0 = 2.7760e-04
Loss = 9.6124e-03, PNorm = 180.1129, GNorm = 0.2110, lr_0 = 2.7741e-04
Loss = 8.0229e-03, PNorm = 180.1216, GNorm = 0.4391, lr_0 = 2.7722e-04
Loss = 7.0198e-03, PNorm = 180.1305, GNorm = 0.1447, lr_0 = 2.7703e-04
Loss = 1.0934e-02, PNorm = 180.1397, GNorm = 0.2853, lr_0 = 2.7684e-04
Loss = 6.4467e-03, PNorm = 180.1494, GNorm = 0.3559, lr_0 = 2.7665e-04
Loss = 9.5426e-03, PNorm = 180.1591, GNorm = 0.1624, lr_0 = 2.7646e-04
Loss = 9.0758e-03, PNorm = 180.1678, GNorm = 0.2918, lr_0 = 2.7627e-04
Loss = 1.0332e-02, PNorm = 180.1768, GNorm = 0.2016, lr_0 = 2.7608e-04
Loss = 6.0087e-03, PNorm = 180.1843, GNorm = 0.3249, lr_0 = 2.7590e-04
Loss = 6.2262e-03, PNorm = 180.1916, GNorm = 0.2294, lr_0 = 2.7571e-04
Loss = 7.1383e-03, PNorm = 180.2001, GNorm = 0.1524, lr_0 = 2.7552e-04
Loss = 6.4832e-03, PNorm = 180.2094, GNorm = 0.4498, lr_0 = 2.7533e-04
Loss = 8.8147e-03, PNorm = 180.2194, GNorm = 0.3520, lr_0 = 2.7514e-04
Loss = 1.1170e-02, PNorm = 180.2291, GNorm = 0.1089, lr_0 = 2.7495e-04
Loss = 9.4530e-03, PNorm = 180.2396, GNorm = 0.1155, lr_0 = 2.7476e-04
Loss = 8.1512e-03, PNorm = 180.2482, GNorm = 0.2114, lr_0 = 2.7457e-04
Loss = 6.8134e-03, PNorm = 180.2581, GNorm = 0.2163, lr_0 = 2.7439e-04
Loss = 1.1951e-02, PNorm = 180.2671, GNorm = 0.2361, lr_0 = 2.7420e-04
Loss = 7.6581e-03, PNorm = 180.2761, GNorm = 0.2096, lr_0 = 2.7401e-04
Loss = 1.3633e-02, PNorm = 180.2863, GNorm = 0.4328, lr_0 = 2.7382e-04
Loss = 6.5386e-03, PNorm = 180.2956, GNorm = 0.1097, lr_0 = 2.7364e-04
Loss = 1.0628e-02, PNorm = 180.3060, GNorm = 0.1066, lr_0 = 2.7345e-04
Loss = 7.2944e-03, PNorm = 180.3157, GNorm = 0.3386, lr_0 = 2.7326e-04
Loss = 6.7613e-03, PNorm = 180.3265, GNorm = 0.1564, lr_0 = 2.7307e-04
Loss = 8.9398e-03, PNorm = 180.3363, GNorm = 0.1685, lr_0 = 2.7289e-04
Loss = 7.5302e-03, PNorm = 180.3486, GNorm = 0.1685, lr_0 = 2.7270e-04
Loss = 8.2032e-03, PNorm = 180.3577, GNorm = 0.1448, lr_0 = 2.7251e-04
Loss = 6.9945e-03, PNorm = 180.3678, GNorm = 0.2005, lr_0 = 2.7233e-04
Loss = 8.0105e-03, PNorm = 180.3777, GNorm = 0.2932, lr_0 = 2.7214e-04
Loss = 6.8626e-03, PNorm = 180.3855, GNorm = 0.1163, lr_0 = 2.7195e-04
Loss = 8.7891e-03, PNorm = 180.3937, GNorm = 0.2297, lr_0 = 2.7177e-04
Loss = 1.2370e-02, PNorm = 180.4051, GNorm = 0.1651, lr_0 = 2.7158e-04
Loss = 8.6608e-03, PNorm = 180.4154, GNorm = 0.2629, lr_0 = 2.7139e-04
Loss = 8.1569e-03, PNorm = 180.4281, GNorm = 0.2605, lr_0 = 2.7121e-04
Loss = 1.2903e-02, PNorm = 180.4403, GNorm = 0.2556, lr_0 = 2.7102e-04
Loss = 6.6942e-03, PNorm = 180.4496, GNorm = 0.3457, lr_0 = 2.7084e-04
Loss = 8.7518e-03, PNorm = 180.4598, GNorm = 0.1522, lr_0 = 2.7065e-04
Loss = 7.4587e-03, PNorm = 180.4686, GNorm = 0.3114, lr_0 = 2.7047e-04
Loss = 6.4306e-03, PNorm = 180.4772, GNorm = 0.1908, lr_0 = 2.7028e-04
Loss = 1.3946e-02, PNorm = 180.4864, GNorm = 0.1871, lr_0 = 2.7010e-04
Loss = 8.4706e-03, PNorm = 180.4941, GNorm = 0.2071, lr_0 = 2.6991e-04
Loss = 8.2964e-03, PNorm = 180.5016, GNorm = 0.1853, lr_0 = 2.6973e-04
Loss = 7.8624e-03, PNorm = 180.5113, GNorm = 0.2290, lr_0 = 2.6954e-04
Loss = 1.1150e-02, PNorm = 180.5218, GNorm = 0.2387, lr_0 = 2.6936e-04
Loss = 9.2336e-03, PNorm = 180.5314, GNorm = 0.1027, lr_0 = 2.6917e-04
Loss = 2.7001e-02, PNorm = 180.5420, GNorm = 0.5392, lr_0 = 2.6899e-04
Loss = 9.1987e-03, PNorm = 180.5497, GNorm = 0.2050, lr_0 = 2.6880e-04
Loss = 1.6255e-02, PNorm = 180.5591, GNorm = 0.2039, lr_0 = 2.6862e-04
Loss = 9.9130e-03, PNorm = 180.5694, GNorm = 0.2682, lr_0 = 2.6844e-04
Loss = 6.6502e-03, PNorm = 180.5776, GNorm = 0.2113, lr_0 = 2.6825e-04
Validation mae = 0.121506
Epoch 18
Loss = 1.0657e-02, PNorm = 180.5846, GNorm = 0.1612, lr_0 = 2.6807e-04
Loss = 7.5694e-03, PNorm = 180.5937, GNorm = 0.1618, lr_0 = 2.6788e-04
Loss = 9.5985e-03, PNorm = 180.6027, GNorm = 0.2430, lr_0 = 2.6770e-04
Loss = 7.7175e-03, PNorm = 180.6112, GNorm = 0.1734, lr_0 = 2.6752e-04
Loss = 7.0132e-03, PNorm = 180.6169, GNorm = 0.4412, lr_0 = 2.6733e-04
Loss = 5.7080e-03, PNorm = 180.6210, GNorm = 0.1250, lr_0 = 2.6715e-04
Loss = 6.4484e-03, PNorm = 180.6271, GNorm = 0.4496, lr_0 = 2.6697e-04
Loss = 7.9285e-03, PNorm = 180.6318, GNorm = 0.2574, lr_0 = 2.6678e-04
Loss = 5.8173e-03, PNorm = 180.6358, GNorm = 0.1167, lr_0 = 2.6660e-04
Loss = 9.6637e-03, PNorm = 180.6441, GNorm = 0.3677, lr_0 = 2.6642e-04
Loss = 6.4259e-03, PNorm = 180.6513, GNorm = 0.0954, lr_0 = 2.6624e-04
Loss = 8.2847e-03, PNorm = 180.6605, GNorm = 0.3322, lr_0 = 2.6605e-04
Loss = 9.5480e-03, PNorm = 180.6669, GNorm = 0.1740, lr_0 = 2.6587e-04
Loss = 7.3196e-03, PNorm = 180.6729, GNorm = 0.1762, lr_0 = 2.6569e-04
Loss = 8.0712e-03, PNorm = 180.6791, GNorm = 0.1731, lr_0 = 2.6551e-04
Loss = 5.5491e-03, PNorm = 180.6858, GNorm = 0.2949, lr_0 = 2.6533e-04
Loss = 6.1512e-03, PNorm = 180.6929, GNorm = 0.1604, lr_0 = 2.6514e-04
Loss = 1.2557e-02, PNorm = 180.6995, GNorm = 0.2373, lr_0 = 2.6496e-04
Loss = 6.3708e-03, PNorm = 180.7059, GNorm = 0.2636, lr_0 = 2.6478e-04
Loss = 6.8472e-03, PNorm = 180.7143, GNorm = 0.1246, lr_0 = 2.6460e-04
Loss = 8.6265e-03, PNorm = 180.7212, GNorm = 0.1775, lr_0 = 2.6442e-04
Loss = 9.6025e-03, PNorm = 180.7291, GNorm = 0.2804, lr_0 = 2.6424e-04
Loss = 1.0080e-02, PNorm = 180.7372, GNorm = 0.2111, lr_0 = 2.6406e-04
Loss = 7.1290e-03, PNorm = 180.7435, GNorm = 0.2668, lr_0 = 2.6388e-04
Loss = 8.1693e-03, PNorm = 180.7488, GNorm = 0.2404, lr_0 = 2.6369e-04
Loss = 5.6364e-03, PNorm = 180.7564, GNorm = 0.2366, lr_0 = 2.6351e-04
Loss = 8.1252e-03, PNorm = 180.7642, GNorm = 0.1569, lr_0 = 2.6333e-04
Loss = 7.7787e-03, PNorm = 180.7713, GNorm = 0.2092, lr_0 = 2.6315e-04
Loss = 1.1039e-02, PNorm = 180.7756, GNorm = 0.1046, lr_0 = 2.6297e-04
Loss = 7.4399e-03, PNorm = 180.7823, GNorm = 0.6124, lr_0 = 2.6279e-04
Loss = 8.9769e-03, PNorm = 180.7899, GNorm = 0.1390, lr_0 = 2.6261e-04
Loss = 8.3291e-03, PNorm = 180.7953, GNorm = 0.1268, lr_0 = 2.6243e-04
Loss = 1.0273e-02, PNorm = 180.8023, GNorm = 0.1491, lr_0 = 2.6225e-04
Loss = 5.9165e-03, PNorm = 180.8090, GNorm = 0.1926, lr_0 = 2.6207e-04
Loss = 5.7700e-03, PNorm = 180.8119, GNorm = 0.2245, lr_0 = 2.6189e-04
Loss = 7.3362e-03, PNorm = 180.8180, GNorm = 0.3210, lr_0 = 2.6171e-04
Loss = 8.5157e-03, PNorm = 180.8250, GNorm = 0.3038, lr_0 = 2.6153e-04
Loss = 6.6284e-03, PNorm = 180.8300, GNorm = 0.3008, lr_0 = 2.6136e-04
Loss = 9.0212e-03, PNorm = 180.8386, GNorm = 0.2708, lr_0 = 2.6118e-04
Loss = 7.7511e-03, PNorm = 180.8476, GNorm = 0.1346, lr_0 = 2.6100e-04
Loss = 5.7468e-03, PNorm = 180.8574, GNorm = 0.1352, lr_0 = 2.6082e-04
Loss = 5.7335e-03, PNorm = 180.8648, GNorm = 0.2740, lr_0 = 2.6064e-04
Loss = 8.7773e-03, PNorm = 180.8697, GNorm = 0.3111, lr_0 = 2.6046e-04
Loss = 1.3187e-02, PNorm = 180.8755, GNorm = 0.1191, lr_0 = 2.6028e-04
Loss = 6.5893e-03, PNorm = 180.8844, GNorm = 0.2201, lr_0 = 2.6011e-04
Loss = 7.1660e-03, PNorm = 180.8910, GNorm = 0.2056, lr_0 = 2.5993e-04
Loss = 7.1572e-03, PNorm = 180.8953, GNorm = 0.2308, lr_0 = 2.5975e-04
Loss = 8.2346e-03, PNorm = 180.9022, GNorm = 0.1391, lr_0 = 2.5957e-04
Loss = 1.1125e-02, PNorm = 180.9128, GNorm = 0.1878, lr_0 = 2.5939e-04
Loss = 5.0081e-03, PNorm = 180.9207, GNorm = 0.2095, lr_0 = 2.5922e-04
Loss = 5.6646e-03, PNorm = 180.9306, GNorm = 0.3990, lr_0 = 2.5904e-04
Loss = 5.4402e-03, PNorm = 180.9386, GNorm = 0.1740, lr_0 = 2.5886e-04
Loss = 4.6659e-03, PNorm = 180.9474, GNorm = 0.1451, lr_0 = 2.5868e-04
Loss = 5.5179e-03, PNorm = 180.9574, GNorm = 0.2694, lr_0 = 2.5851e-04
Loss = 9.6483e-03, PNorm = 180.9678, GNorm = 0.3224, lr_0 = 2.5833e-04
Loss = 7.0441e-03, PNorm = 180.9751, GNorm = 0.1360, lr_0 = 2.5815e-04
Loss = 7.6280e-03, PNorm = 180.9830, GNorm = 0.0685, lr_0 = 2.5797e-04
Loss = 5.6277e-03, PNorm = 180.9906, GNorm = 0.1350, lr_0 = 2.5780e-04
Loss = 8.3791e-03, PNorm = 180.9981, GNorm = 0.1564, lr_0 = 2.5762e-04
Loss = 7.0456e-03, PNorm = 181.0085, GNorm = 0.0946, lr_0 = 2.5745e-04
Loss = 6.2568e-03, PNorm = 181.0155, GNorm = 0.1087, lr_0 = 2.5727e-04
Loss = 7.7787e-03, PNorm = 181.0197, GNorm = 0.1946, lr_0 = 2.5709e-04
Loss = 5.7358e-03, PNorm = 181.0263, GNorm = 0.3135, lr_0 = 2.5692e-04
Loss = 1.6373e-02, PNorm = 181.0365, GNorm = 0.2614, lr_0 = 2.5674e-04
Loss = 7.1889e-03, PNorm = 181.0444, GNorm = 0.2708, lr_0 = 2.5656e-04
Loss = 8.7260e-03, PNorm = 181.0490, GNorm = 0.1908, lr_0 = 2.5639e-04
Loss = 7.5227e-03, PNorm = 181.0580, GNorm = 0.1432, lr_0 = 2.5621e-04
Loss = 5.8923e-03, PNorm = 181.0646, GNorm = 0.4173, lr_0 = 2.5604e-04
Loss = 6.7568e-03, PNorm = 181.0714, GNorm = 0.2115, lr_0 = 2.5586e-04
Loss = 7.0352e-03, PNorm = 181.0790, GNorm = 0.4290, lr_0 = 2.5569e-04
Loss = 9.8275e-03, PNorm = 181.0880, GNorm = 0.4603, lr_0 = 2.5551e-04
Loss = 8.4531e-03, PNorm = 181.0991, GNorm = 0.1078, lr_0 = 2.5534e-04
Loss = 6.6507e-03, PNorm = 181.1095, GNorm = 0.4140, lr_0 = 2.5516e-04
Loss = 8.1975e-03, PNorm = 181.1180, GNorm = 0.3881, lr_0 = 2.5499e-04
Loss = 7.6759e-03, PNorm = 181.1266, GNorm = 0.5246, lr_0 = 2.5481e-04
Loss = 6.3374e-03, PNorm = 181.1337, GNorm = 0.3639, lr_0 = 2.5464e-04
Loss = 5.8123e-03, PNorm = 181.1406, GNorm = 0.1519, lr_0 = 2.5446e-04
Loss = 4.6996e-03, PNorm = 181.1466, GNorm = 0.1415, lr_0 = 2.5429e-04
Loss = 9.2773e-03, PNorm = 181.1556, GNorm = 0.1597, lr_0 = 2.5411e-04
Loss = 6.2628e-03, PNorm = 181.1646, GNorm = 0.1215, lr_0 = 2.5394e-04
Loss = 5.1053e-03, PNorm = 181.1713, GNorm = 0.2489, lr_0 = 2.5377e-04
Loss = 1.2490e-02, PNorm = 181.1773, GNorm = 0.2716, lr_0 = 2.5359e-04
Loss = 7.4127e-03, PNorm = 181.1829, GNorm = 0.3243, lr_0 = 2.5342e-04
Loss = 5.0087e-03, PNorm = 181.1895, GNorm = 0.1946, lr_0 = 2.5325e-04
Loss = 7.3718e-03, PNorm = 181.1966, GNorm = 0.2187, lr_0 = 2.5307e-04
Loss = 6.5850e-03, PNorm = 181.2040, GNorm = 0.1071, lr_0 = 2.5290e-04
Loss = 5.9307e-03, PNorm = 181.2100, GNorm = 0.2607, lr_0 = 2.5273e-04
Loss = 5.4949e-03, PNorm = 181.2161, GNorm = 0.2095, lr_0 = 2.5255e-04
Loss = 1.2661e-02, PNorm = 181.2234, GNorm = 0.4589, lr_0 = 2.5238e-04
Loss = 7.7486e-03, PNorm = 181.2307, GNorm = 0.1288, lr_0 = 2.5221e-04
Loss = 8.9571e-03, PNorm = 181.2384, GNorm = 0.1854, lr_0 = 2.5203e-04
Loss = 5.6335e-03, PNorm = 181.2466, GNorm = 0.3187, lr_0 = 2.5186e-04
Loss = 1.2776e-02, PNorm = 181.2538, GNorm = 0.2424, lr_0 = 2.5169e-04
Loss = 6.4549e-03, PNorm = 181.2602, GNorm = 0.2742, lr_0 = 2.5152e-04
Loss = 1.1113e-02, PNorm = 181.2686, GNorm = 0.1200, lr_0 = 2.5134e-04
Loss = 1.0390e-02, PNorm = 181.2762, GNorm = 0.2720, lr_0 = 2.5117e-04
Loss = 7.8168e-03, PNorm = 181.2815, GNorm = 0.1634, lr_0 = 2.5100e-04
Loss = 9.3086e-03, PNorm = 181.2875, GNorm = 0.1606, lr_0 = 2.5083e-04
Loss = 9.3426e-03, PNorm = 181.2914, GNorm = 0.3295, lr_0 = 2.5066e-04
Loss = 7.9747e-03, PNorm = 181.2989, GNorm = 0.1602, lr_0 = 2.5048e-04
Loss = 1.1890e-02, PNorm = 181.3057, GNorm = 0.1142, lr_0 = 2.5031e-04
Loss = 7.9146e-03, PNorm = 181.3126, GNorm = 0.6072, lr_0 = 2.5014e-04
Loss = 9.1318e-03, PNorm = 181.3178, GNorm = 0.1738, lr_0 = 2.4997e-04
Loss = 8.5836e-03, PNorm = 181.3250, GNorm = 0.3613, lr_0 = 2.4980e-04
Loss = 1.0771e-02, PNorm = 181.3356, GNorm = 0.2298, lr_0 = 2.4963e-04
Loss = 1.1497e-02, PNorm = 181.3433, GNorm = 0.3089, lr_0 = 2.4946e-04
Loss = 8.4591e-03, PNorm = 181.3547, GNorm = 0.2431, lr_0 = 2.4929e-04
Loss = 5.0589e-03, PNorm = 181.3657, GNorm = 0.1392, lr_0 = 2.4911e-04
Loss = 1.0890e-02, PNorm = 181.3725, GNorm = 0.1284, lr_0 = 2.4894e-04
Loss = 6.8250e-03, PNorm = 181.3834, GNorm = 0.3393, lr_0 = 2.4877e-04
Loss = 5.9150e-03, PNorm = 181.3931, GNorm = 0.1417, lr_0 = 2.4860e-04
Loss = 6.2345e-03, PNorm = 181.4002, GNorm = 0.1226, lr_0 = 2.4843e-04
Loss = 8.5663e-03, PNorm = 181.4085, GNorm = 0.1420, lr_0 = 2.4826e-04
Loss = 9.6587e-03, PNorm = 181.4162, GNorm = 0.1818, lr_0 = 2.4809e-04
Loss = 1.4662e-02, PNorm = 181.4218, GNorm = 0.1513, lr_0 = 2.4792e-04
Loss = 1.6216e-02, PNorm = 181.4287, GNorm = 0.2988, lr_0 = 2.4775e-04
Loss = 8.9824e-03, PNorm = 181.4390, GNorm = 0.1275, lr_0 = 2.4758e-04
Loss = 1.0958e-02, PNorm = 181.4478, GNorm = 0.1293, lr_0 = 2.4741e-04
Loss = 6.7162e-03, PNorm = 181.4567, GNorm = 0.5767, lr_0 = 2.4724e-04
Loss = 6.2535e-03, PNorm = 181.4652, GNorm = 0.1316, lr_0 = 2.4707e-04
Validation mae = 0.121366
Epoch 19
Loss = 3.9336e-03, PNorm = 181.4716, GNorm = 0.2795, lr_0 = 2.4690e-04
Loss = 2.0953e-02, PNorm = 181.4793, GNorm = 0.3575, lr_0 = 2.4674e-04
Loss = 9.3911e-03, PNorm = 181.4843, GNorm = 0.1834, lr_0 = 2.4657e-04
Loss = 5.4731e-03, PNorm = 181.4903, GNorm = 0.2410, lr_0 = 2.4640e-04
Loss = 6.1306e-03, PNorm = 181.4955, GNorm = 0.1275, lr_0 = 2.4623e-04
Loss = 4.6742e-03, PNorm = 181.4992, GNorm = 0.1779, lr_0 = 2.4606e-04
Loss = 4.7692e-03, PNorm = 181.5019, GNorm = 0.3820, lr_0 = 2.4589e-04
Loss = 5.1227e-03, PNorm = 181.5071, GNorm = 0.1561, lr_0 = 2.4572e-04
Loss = 9.4990e-03, PNorm = 181.5120, GNorm = 0.1287, lr_0 = 2.4556e-04
Loss = 7.4706e-03, PNorm = 181.5182, GNorm = 0.2529, lr_0 = 2.4539e-04
Loss = 6.9655e-03, PNorm = 181.5261, GNorm = 0.1427, lr_0 = 2.4522e-04
Loss = 8.2113e-03, PNorm = 181.5315, GNorm = 0.1159, lr_0 = 2.4505e-04
Loss = 8.6626e-03, PNorm = 181.5374, GNorm = 0.3389, lr_0 = 2.4488e-04
Loss = 1.1150e-02, PNorm = 181.5431, GNorm = 1.9091, lr_0 = 2.4472e-04
Loss = 6.6792e-03, PNorm = 181.5501, GNorm = 0.3459, lr_0 = 2.4455e-04
Loss = 4.7407e-03, PNorm = 181.5571, GNorm = 0.2262, lr_0 = 2.4438e-04
Loss = 6.7069e-03, PNorm = 181.5659, GNorm = 0.3991, lr_0 = 2.4421e-04
Loss = 5.4827e-03, PNorm = 181.5731, GNorm = 0.1847, lr_0 = 2.4405e-04
Loss = 7.9032e-03, PNorm = 181.5819, GNorm = 0.2131, lr_0 = 2.4388e-04
Loss = 7.3965e-03, PNorm = 181.5906, GNorm = 0.1346, lr_0 = 2.4371e-04
Loss = 7.9120e-03, PNorm = 181.5965, GNorm = 0.1264, lr_0 = 2.4354e-04
Loss = 6.1878e-03, PNorm = 181.6042, GNorm = 0.2238, lr_0 = 2.4338e-04
Loss = 9.0038e-03, PNorm = 181.6085, GNorm = 0.1582, lr_0 = 2.4321e-04
Loss = 7.3362e-03, PNorm = 181.6134, GNorm = 0.2779, lr_0 = 2.4304e-04
Loss = 6.7174e-03, PNorm = 181.6194, GNorm = 0.2645, lr_0 = 2.4288e-04
Loss = 5.5608e-03, PNorm = 181.6267, GNorm = 0.1844, lr_0 = 2.4271e-04
Loss = 6.8761e-03, PNorm = 181.6331, GNorm = 0.3450, lr_0 = 2.4254e-04
Loss = 7.0123e-03, PNorm = 181.6366, GNorm = 0.1117, lr_0 = 2.4238e-04
Loss = 4.5852e-03, PNorm = 181.6433, GNorm = 0.1314, lr_0 = 2.4221e-04
Loss = 6.5052e-03, PNorm = 181.6512, GNorm = 0.1805, lr_0 = 2.4205e-04
Loss = 4.9412e-03, PNorm = 181.6564, GNorm = 0.2092, lr_0 = 2.4188e-04
Loss = 5.4913e-03, PNorm = 181.6619, GNorm = 0.3759, lr_0 = 2.4171e-04
Loss = 1.0801e-02, PNorm = 181.6711, GNorm = 0.4518, lr_0 = 2.4155e-04
Loss = 5.0363e-03, PNorm = 181.6764, GNorm = 0.1361, lr_0 = 2.4138e-04
Loss = 1.1361e-02, PNorm = 181.6825, GNorm = 0.5038, lr_0 = 2.4122e-04
Loss = 7.8047e-03, PNorm = 181.6893, GNorm = 0.1360, lr_0 = 2.4105e-04
Loss = 5.2345e-03, PNorm = 181.6948, GNorm = 0.1293, lr_0 = 2.4089e-04
Loss = 6.5621e-03, PNorm = 181.6993, GNorm = 0.1126, lr_0 = 2.4072e-04
Loss = 5.9839e-03, PNorm = 181.7025, GNorm = 0.4502, lr_0 = 2.4056e-04
Loss = 1.0584e-02, PNorm = 181.7108, GNorm = 0.1120, lr_0 = 2.4039e-04
Loss = 8.0912e-03, PNorm = 181.7178, GNorm = 0.1371, lr_0 = 2.4023e-04
Loss = 5.2160e-03, PNorm = 181.7231, GNorm = 0.1071, lr_0 = 2.4006e-04
Loss = 4.2233e-03, PNorm = 181.7284, GNorm = 0.1499, lr_0 = 2.3990e-04
Loss = 4.7423e-03, PNorm = 181.7333, GNorm = 0.0986, lr_0 = 2.3974e-04
Loss = 5.7250e-03, PNorm = 181.7407, GNorm = 0.2131, lr_0 = 2.3957e-04
Loss = 9.0746e-03, PNorm = 181.7472, GNorm = 0.2190, lr_0 = 2.3941e-04
Loss = 5.1552e-03, PNorm = 181.7522, GNorm = 0.0829, lr_0 = 2.3924e-04
Loss = 8.4449e-03, PNorm = 181.7591, GNorm = 0.1912, lr_0 = 2.3908e-04
Loss = 7.2248e-03, PNorm = 181.7655, GNorm = 0.1325, lr_0 = 2.3892e-04
Loss = 4.5384e-03, PNorm = 181.7724, GNorm = 0.2339, lr_0 = 2.3875e-04
Loss = 7.1431e-03, PNorm = 181.7775, GNorm = 0.1627, lr_0 = 2.3859e-04
Loss = 1.5281e-02, PNorm = 181.7846, GNorm = 0.2533, lr_0 = 2.3842e-04
Loss = 7.9546e-03, PNorm = 181.7896, GNorm = 0.2886, lr_0 = 2.3826e-04
Loss = 4.7653e-03, PNorm = 181.7941, GNorm = 0.2273, lr_0 = 2.3810e-04
Loss = 5.2187e-03, PNorm = 181.7997, GNorm = 0.2027, lr_0 = 2.3794e-04
Loss = 9.9559e-03, PNorm = 181.8058, GNorm = 0.1690, lr_0 = 2.3777e-04
Loss = 5.2088e-03, PNorm = 181.8108, GNorm = 0.1181, lr_0 = 2.3761e-04
Loss = 6.7468e-03, PNorm = 181.8175, GNorm = 0.1666, lr_0 = 2.3745e-04
Loss = 5.1268e-03, PNorm = 181.8254, GNorm = 0.1051, lr_0 = 2.3728e-04
Loss = 1.0115e-02, PNorm = 181.8330, GNorm = 0.1027, lr_0 = 2.3712e-04
Loss = 6.1180e-03, PNorm = 181.8413, GNorm = 0.1745, lr_0 = 2.3696e-04
Loss = 4.6682e-03, PNorm = 181.8478, GNorm = 0.0903, lr_0 = 2.3680e-04
Loss = 6.7098e-03, PNorm = 181.8554, GNorm = 0.2056, lr_0 = 2.3663e-04
Loss = 8.5499e-03, PNorm = 181.8619, GNorm = 0.3300, lr_0 = 2.3647e-04
Loss = 6.8204e-03, PNorm = 181.8657, GNorm = 0.2899, lr_0 = 2.3631e-04
Loss = 5.9002e-03, PNorm = 181.8736, GNorm = 0.1431, lr_0 = 2.3615e-04
Loss = 1.0208e-02, PNorm = 181.8782, GNorm = 0.1557, lr_0 = 2.3599e-04
Loss = 7.2975e-03, PNorm = 181.8850, GNorm = 0.1568, lr_0 = 2.3582e-04
Loss = 1.0012e-02, PNorm = 181.8918, GNorm = 0.1103, lr_0 = 2.3566e-04
Loss = 9.7671e-03, PNorm = 181.8993, GNorm = 0.2693, lr_0 = 2.3550e-04
Loss = 5.0797e-03, PNorm = 181.9064, GNorm = 0.1730, lr_0 = 2.3534e-04
Loss = 5.4443e-03, PNorm = 181.9128, GNorm = 0.0989, lr_0 = 2.3518e-04
Loss = 5.4920e-03, PNorm = 181.9198, GNorm = 0.1393, lr_0 = 2.3502e-04
Loss = 6.5166e-03, PNorm = 181.9284, GNorm = 0.5846, lr_0 = 2.3486e-04
Loss = 7.9580e-03, PNorm = 181.9345, GNorm = 0.5087, lr_0 = 2.3470e-04
Loss = 7.5543e-03, PNorm = 181.9411, GNorm = 0.2644, lr_0 = 2.3454e-04
Loss = 7.4116e-03, PNorm = 181.9500, GNorm = 0.3269, lr_0 = 2.3437e-04
Loss = 7.0422e-03, PNorm = 181.9548, GNorm = 0.1783, lr_0 = 2.3421e-04
Loss = 6.1022e-03, PNorm = 181.9574, GNorm = 0.0978, lr_0 = 2.3405e-04
Loss = 1.3403e-02, PNorm = 181.9614, GNorm = 0.3690, lr_0 = 2.3389e-04
Loss = 6.3123e-03, PNorm = 181.9680, GNorm = 0.1455, lr_0 = 2.3373e-04
Loss = 4.3956e-03, PNorm = 181.9763, GNorm = 0.0946, lr_0 = 2.3357e-04
Loss = 1.2518e-02, PNorm = 181.9844, GNorm = 0.1512, lr_0 = 2.3341e-04
Loss = 6.9910e-03, PNorm = 181.9901, GNorm = 0.1966, lr_0 = 2.3325e-04
Loss = 1.2229e-02, PNorm = 181.9971, GNorm = 0.4045, lr_0 = 2.3309e-04
Loss = 9.7544e-03, PNorm = 182.0041, GNorm = 0.7894, lr_0 = 2.3293e-04
Loss = 9.9314e-03, PNorm = 182.0087, GNorm = 0.1622, lr_0 = 2.3277e-04
Loss = 5.7177e-03, PNorm = 182.0156, GNorm = 0.1638, lr_0 = 2.3261e-04
Loss = 4.9701e-03, PNorm = 182.0200, GNorm = 0.1350, lr_0 = 2.3246e-04
Loss = 5.7233e-03, PNorm = 182.0255, GNorm = 0.1382, lr_0 = 2.3230e-04
Loss = 8.5055e-03, PNorm = 182.0323, GNorm = 0.3287, lr_0 = 2.3214e-04
Loss = 9.0381e-03, PNorm = 182.0399, GNorm = 0.1283, lr_0 = 2.3198e-04
Loss = 4.6369e-03, PNorm = 182.0465, GNorm = 0.2643, lr_0 = 2.3182e-04
Loss = 6.2107e-03, PNorm = 182.0538, GNorm = 0.0800, lr_0 = 2.3166e-04
Loss = 1.3698e-02, PNorm = 182.0606, GNorm = 0.3294, lr_0 = 2.3150e-04
Loss = 5.4826e-03, PNorm = 182.0675, GNorm = 0.1412, lr_0 = 2.3134e-04
Loss = 6.9902e-03, PNorm = 182.0763, GNorm = 0.1211, lr_0 = 2.3118e-04
Loss = 1.1501e-02, PNorm = 182.0852, GNorm = 1.1309, lr_0 = 2.3103e-04
Loss = 7.3339e-03, PNorm = 182.0922, GNorm = 0.1612, lr_0 = 2.3087e-04
Loss = 6.8914e-03, PNorm = 182.0982, GNorm = 0.1630, lr_0 = 2.3071e-04
Loss = 4.5990e-03, PNorm = 182.1041, GNorm = 0.1203, lr_0 = 2.3055e-04
Loss = 6.1912e-03, PNorm = 182.1109, GNorm = 0.2105, lr_0 = 2.3039e-04
Loss = 1.0964e-02, PNorm = 182.1185, GNorm = 1.3661, lr_0 = 2.3024e-04
Loss = 5.7651e-03, PNorm = 182.1235, GNorm = 0.1306, lr_0 = 2.3008e-04
Loss = 6.1342e-03, PNorm = 182.1305, GNorm = 0.4048, lr_0 = 2.2992e-04
Loss = 6.8654e-03, PNorm = 182.1368, GNorm = 0.1899, lr_0 = 2.2976e-04
Loss = 4.7910e-03, PNorm = 182.1445, GNorm = 0.1915, lr_0 = 2.2961e-04
Loss = 4.5638e-03, PNorm = 182.1528, GNorm = 0.1397, lr_0 = 2.2945e-04
Loss = 5.5023e-03, PNorm = 182.1623, GNorm = 0.2712, lr_0 = 2.2929e-04
Loss = 6.9340e-03, PNorm = 182.1739, GNorm = 0.0897, lr_0 = 2.2913e-04
Loss = 9.6578e-03, PNorm = 182.1844, GNorm = 0.2310, lr_0 = 2.2898e-04
Loss = 4.3520e-03, PNorm = 182.1897, GNorm = 0.1097, lr_0 = 2.2882e-04
Loss = 5.6791e-03, PNorm = 182.1958, GNorm = 0.1376, lr_0 = 2.2866e-04
Loss = 5.9205e-03, PNorm = 182.2004, GNorm = 0.0827, lr_0 = 2.2851e-04
Loss = 5.4563e-03, PNorm = 182.2074, GNorm = 0.1082, lr_0 = 2.2835e-04
Loss = 9.7927e-03, PNorm = 182.2137, GNorm = 0.1946, lr_0 = 2.2819e-04
Loss = 5.0513e-03, PNorm = 182.2208, GNorm = 0.2260, lr_0 = 2.2804e-04
Loss = 6.0097e-03, PNorm = 182.2274, GNorm = 0.1842, lr_0 = 2.2788e-04
Loss = 9.3845e-03, PNorm = 182.2348, GNorm = 0.1779, lr_0 = 2.2773e-04
Loss = 5.8846e-03, PNorm = 182.2421, GNorm = 0.2296, lr_0 = 2.2757e-04
Validation mae = 0.121547
Epoch 20
Loss = 7.7295e-03, PNorm = 182.2492, GNorm = 0.1131, lr_0 = 2.2741e-04
Loss = 3.9797e-03, PNorm = 182.2534, GNorm = 0.1234, lr_0 = 2.2726e-04
Loss = 5.5255e-03, PNorm = 182.2579, GNorm = 0.1230, lr_0 = 2.2710e-04
Loss = 5.3124e-03, PNorm = 182.2638, GNorm = 0.1697, lr_0 = 2.2695e-04
Loss = 4.7362e-03, PNorm = 182.2697, GNorm = 0.0647, lr_0 = 2.2679e-04
Loss = 5.1489e-03, PNorm = 182.2743, GNorm = 0.1886, lr_0 = 2.2664e-04
Loss = 1.8787e-02, PNorm = 182.2785, GNorm = 0.2265, lr_0 = 2.2648e-04
Loss = 7.2288e-03, PNorm = 182.2794, GNorm = 0.1530, lr_0 = 2.2632e-04
Loss = 7.3264e-03, PNorm = 182.2819, GNorm = 0.1197, lr_0 = 2.2617e-04
Loss = 6.0091e-03, PNorm = 182.2855, GNorm = 0.2472, lr_0 = 2.2601e-04
Loss = 4.1619e-03, PNorm = 182.2912, GNorm = 0.1916, lr_0 = 2.2586e-04
Loss = 5.8217e-03, PNorm = 182.2959, GNorm = 0.1232, lr_0 = 2.2571e-04
Loss = 4.5965e-03, PNorm = 182.2989, GNorm = 0.1085, lr_0 = 2.2555e-04
Loss = 8.8102e-03, PNorm = 182.3037, GNorm = 0.0631, lr_0 = 2.2540e-04
Loss = 7.7504e-03, PNorm = 182.3096, GNorm = 0.3325, lr_0 = 2.2524e-04
Loss = 7.0076e-03, PNorm = 182.3144, GNorm = 0.2010, lr_0 = 2.2509e-04
Loss = 4.3057e-03, PNorm = 182.3189, GNorm = 0.2314, lr_0 = 2.2493e-04
Loss = 5.6302e-03, PNorm = 182.3220, GNorm = 0.1682, lr_0 = 2.2478e-04
Loss = 5.0130e-03, PNorm = 182.3283, GNorm = 0.1913, lr_0 = 2.2463e-04
Loss = 9.3913e-03, PNorm = 182.3340, GNorm = 0.0978, lr_0 = 2.2447e-04
Loss = 7.0675e-03, PNorm = 182.3388, GNorm = 0.2366, lr_0 = 2.2432e-04
Loss = 4.3903e-03, PNorm = 182.3451, GNorm = 0.2575, lr_0 = 2.2416e-04
Loss = 8.6516e-03, PNorm = 182.3496, GNorm = 0.2690, lr_0 = 2.2401e-04
Loss = 8.3553e-03, PNorm = 182.3521, GNorm = 0.3152, lr_0 = 2.2386e-04
Loss = 6.2487e-03, PNorm = 182.3566, GNorm = 0.1038, lr_0 = 2.2370e-04
Loss = 7.3928e-03, PNorm = 182.3631, GNorm = 0.5080, lr_0 = 2.2355e-04
Loss = 5.5078e-03, PNorm = 182.3691, GNorm = 0.1626, lr_0 = 2.2340e-04
Loss = 4.8495e-03, PNorm = 182.3741, GNorm = 0.2125, lr_0 = 2.2324e-04
Loss = 5.1654e-03, PNorm = 182.3815, GNorm = 0.2620, lr_0 = 2.2309e-04
Loss = 7.0729e-03, PNorm = 182.3859, GNorm = 0.1395, lr_0 = 2.2294e-04
Loss = 6.3083e-03, PNorm = 182.3897, GNorm = 0.1850, lr_0 = 2.2279e-04
Loss = 5.2093e-03, PNorm = 182.3941, GNorm = 0.2513, lr_0 = 2.2263e-04
Loss = 5.8067e-03, PNorm = 182.3975, GNorm = 0.1103, lr_0 = 2.2248e-04
Loss = 7.6702e-03, PNorm = 182.4025, GNorm = 0.2112, lr_0 = 2.2233e-04
Loss = 7.6728e-03, PNorm = 182.4090, GNorm = 0.1519, lr_0 = 2.2218e-04
Loss = 7.7056e-03, PNorm = 182.4145, GNorm = 0.1779, lr_0 = 2.2202e-04
Loss = 4.6514e-03, PNorm = 182.4193, GNorm = 0.2842, lr_0 = 2.2187e-04
Loss = 6.7475e-03, PNorm = 182.4229, GNorm = 0.1652, lr_0 = 2.2172e-04
Loss = 8.7403e-03, PNorm = 182.4269, GNorm = 0.1247, lr_0 = 2.2157e-04
Loss = 6.7502e-03, PNorm = 182.4323, GNorm = 0.3813, lr_0 = 2.2142e-04
Loss = 5.3538e-03, PNorm = 182.4387, GNorm = 0.2617, lr_0 = 2.2126e-04
Loss = 4.4060e-03, PNorm = 182.4441, GNorm = 0.1359, lr_0 = 2.2111e-04
Loss = 4.0754e-03, PNorm = 182.4500, GNorm = 0.1439, lr_0 = 2.2096e-04
Loss = 5.1241e-03, PNorm = 182.4558, GNorm = 0.0815, lr_0 = 2.2081e-04
Loss = 4.9086e-03, PNorm = 182.4606, GNorm = 0.1422, lr_0 = 2.2066e-04
Loss = 4.1830e-03, PNorm = 182.4654, GNorm = 0.1377, lr_0 = 2.2051e-04
Loss = 5.3456e-03, PNorm = 182.4717, GNorm = 0.0944, lr_0 = 2.2036e-04
Loss = 6.0033e-03, PNorm = 182.4767, GNorm = 0.4528, lr_0 = 2.2021e-04
Loss = 8.7533e-03, PNorm = 182.4819, GNorm = 0.1098, lr_0 = 2.2005e-04
Loss = 6.2279e-03, PNorm = 182.4880, GNorm = 0.1024, lr_0 = 2.1990e-04
Loss = 5.0381e-03, PNorm = 182.4925, GNorm = 0.1096, lr_0 = 2.1975e-04
Loss = 4.0024e-03, PNorm = 182.4972, GNorm = 0.1855, lr_0 = 2.1960e-04
Loss = 3.4692e-03, PNorm = 182.5033, GNorm = 0.1806, lr_0 = 2.1945e-04
Loss = 7.8128e-03, PNorm = 182.5072, GNorm = 0.1152, lr_0 = 2.1930e-04
Loss = 6.5872e-03, PNorm = 182.5108, GNorm = 0.1084, lr_0 = 2.1915e-04
Loss = 5.2377e-03, PNorm = 182.5145, GNorm = 0.1402, lr_0 = 2.1900e-04
Loss = 6.2705e-03, PNorm = 182.5174, GNorm = 0.2418, lr_0 = 2.1885e-04
Loss = 4.3598e-03, PNorm = 182.5217, GNorm = 0.1702, lr_0 = 2.1870e-04
Loss = 9.2321e-03, PNorm = 182.5292, GNorm = 0.1302, lr_0 = 2.1855e-04
Loss = 4.9896e-03, PNorm = 182.5340, GNorm = 0.1737, lr_0 = 2.1840e-04
Loss = 5.0746e-03, PNorm = 182.5373, GNorm = 0.1796, lr_0 = 2.1825e-04
Loss = 5.3224e-03, PNorm = 182.5424, GNorm = 0.1896, lr_0 = 2.1810e-04
Loss = 7.7324e-03, PNorm = 182.5477, GNorm = 0.0849, lr_0 = 2.1795e-04
Loss = 6.6414e-03, PNorm = 182.5520, GNorm = 0.0881, lr_0 = 2.1780e-04
Loss = 4.4966e-03, PNorm = 182.5582, GNorm = 0.1531, lr_0 = 2.1765e-04
Loss = 4.3041e-03, PNorm = 182.5636, GNorm = 0.2054, lr_0 = 2.1751e-04
Loss = 9.4989e-03, PNorm = 182.5704, GNorm = 0.6336, lr_0 = 2.1736e-04
Loss = 6.0189e-03, PNorm = 182.5773, GNorm = 0.1180, lr_0 = 2.1721e-04
Loss = 4.5506e-03, PNorm = 182.5844, GNorm = 0.1337, lr_0 = 2.1706e-04
Loss = 3.8919e-03, PNorm = 182.5912, GNorm = 0.0994, lr_0 = 2.1691e-04
Loss = 6.1358e-03, PNorm = 182.5956, GNorm = 0.0974, lr_0 = 2.1676e-04
Loss = 4.1229e-03, PNorm = 182.5998, GNorm = 0.3354, lr_0 = 2.1661e-04
Loss = 6.5003e-03, PNorm = 182.6058, GNorm = 0.1351, lr_0 = 2.1646e-04
Loss = 9.5379e-03, PNorm = 182.6103, GNorm = 0.0979, lr_0 = 2.1632e-04
Loss = 3.9479e-03, PNorm = 182.6157, GNorm = 0.1411, lr_0 = 2.1617e-04
Loss = 6.4628e-03, PNorm = 182.6207, GNorm = 0.1698, lr_0 = 2.1602e-04
Loss = 3.8829e-03, PNorm = 182.6266, GNorm = 0.0981, lr_0 = 2.1587e-04
Loss = 4.2963e-03, PNorm = 182.6326, GNorm = 0.2088, lr_0 = 2.1572e-04
Loss = 4.6991e-03, PNorm = 182.6386, GNorm = 0.1454, lr_0 = 2.1558e-04
Loss = 9.1126e-03, PNorm = 182.6449, GNorm = 0.2626, lr_0 = 2.1543e-04
Loss = 4.9688e-03, PNorm = 182.6503, GNorm = 0.1194, lr_0 = 2.1528e-04
Loss = 1.4108e-02, PNorm = 182.6573, GNorm = 0.1974, lr_0 = 2.1513e-04
Loss = 7.1756e-03, PNorm = 182.6617, GNorm = 0.1356, lr_0 = 2.1499e-04
Loss = 9.7139e-03, PNorm = 182.6672, GNorm = 1.2108, lr_0 = 2.1484e-04
Loss = 5.7748e-03, PNorm = 182.6718, GNorm = 0.1660, lr_0 = 2.1469e-04
Loss = 4.9703e-03, PNorm = 182.6785, GNorm = 0.0826, lr_0 = 2.1454e-04
Loss = 5.3454e-03, PNorm = 182.6836, GNorm = 0.0806, lr_0 = 2.1440e-04
Loss = 4.3405e-03, PNorm = 182.6888, GNorm = 0.1227, lr_0 = 2.1425e-04
Loss = 8.6864e-03, PNorm = 182.6956, GNorm = 0.3013, lr_0 = 2.1410e-04
Loss = 1.1170e-02, PNorm = 182.6984, GNorm = 0.0781, lr_0 = 2.1396e-04
Loss = 6.2977e-03, PNorm = 182.7005, GNorm = 0.0996, lr_0 = 2.1381e-04
Loss = 1.0430e-02, PNorm = 182.7023, GNorm = 1.0569, lr_0 = 2.1366e-04
Loss = 6.9230e-03, PNorm = 182.7069, GNorm = 0.1226, lr_0 = 2.1352e-04
Loss = 4.7793e-03, PNorm = 182.7135, GNorm = 0.1546, lr_0 = 2.1337e-04
Loss = 8.4141e-03, PNorm = 182.7190, GNorm = 0.1017, lr_0 = 2.1323e-04
Loss = 4.9922e-03, PNorm = 182.7240, GNorm = 0.2573, lr_0 = 2.1308e-04
Loss = 9.7561e-03, PNorm = 182.7281, GNorm = 0.3199, lr_0 = 2.1293e-04
Loss = 5.0636e-03, PNorm = 182.7343, GNorm = 0.2319, lr_0 = 2.1279e-04
Loss = 8.4826e-03, PNorm = 182.7427, GNorm = 0.2058, lr_0 = 2.1264e-04
Loss = 6.8819e-03, PNorm = 182.7472, GNorm = 0.1905, lr_0 = 2.1250e-04
Loss = 4.2238e-03, PNorm = 182.7529, GNorm = 0.1590, lr_0 = 2.1235e-04
Loss = 4.7111e-03, PNorm = 182.7607, GNorm = 0.3127, lr_0 = 2.1221e-04
Loss = 4.8966e-03, PNorm = 182.7665, GNorm = 0.2182, lr_0 = 2.1206e-04
Loss = 7.1729e-03, PNorm = 182.7727, GNorm = 0.4339, lr_0 = 2.1191e-04
Loss = 7.5003e-03, PNorm = 182.7785, GNorm = 0.0987, lr_0 = 2.1177e-04
Loss = 7.5933e-03, PNorm = 182.7852, GNorm = 0.1367, lr_0 = 2.1162e-04
Loss = 4.3026e-03, PNorm = 182.7914, GNorm = 0.1555, lr_0 = 2.1148e-04
Loss = 5.0495e-03, PNorm = 182.7959, GNorm = 0.2413, lr_0 = 2.1133e-04
Loss = 4.0879e-03, PNorm = 182.8015, GNorm = 0.1866, lr_0 = 2.1119e-04
Loss = 7.1327e-03, PNorm = 182.8051, GNorm = 0.1075, lr_0 = 2.1104e-04
Loss = 4.4456e-03, PNorm = 182.8113, GNorm = 0.2708, lr_0 = 2.1090e-04
Loss = 4.2138e-03, PNorm = 182.8183, GNorm = 0.1737, lr_0 = 2.1076e-04
Loss = 3.9925e-03, PNorm = 182.8240, GNorm = 0.1442, lr_0 = 2.1061e-04
Loss = 8.4607e-03, PNorm = 182.8288, GNorm = 0.0960, lr_0 = 2.1047e-04
Loss = 5.4248e-03, PNorm = 182.8357, GNorm = 0.0955, lr_0 = 2.1032e-04
Loss = 8.9471e-03, PNorm = 182.8427, GNorm = 0.1173, lr_0 = 2.1018e-04
Loss = 4.7559e-03, PNorm = 182.8501, GNorm = 0.1221, lr_0 = 2.1003e-04
Loss = 8.8529e-03, PNorm = 182.8589, GNorm = 0.1804, lr_0 = 2.0989e-04
Loss = 7.4923e-03, PNorm = 182.8633, GNorm = 0.9934, lr_0 = 2.0975e-04
Loss = 1.1569e-02, PNorm = 182.8663, GNorm = 0.3570, lr_0 = 2.0960e-04
Validation mae = 0.121214
Epoch 21
Loss = 5.0534e-03, PNorm = 182.8717, GNorm = 0.2237, lr_0 = 2.0946e-04
Loss = 4.2660e-03, PNorm = 182.8740, GNorm = 0.2664, lr_0 = 2.0932e-04
Loss = 3.9255e-03, PNorm = 182.8778, GNorm = 0.1134, lr_0 = 2.0917e-04
Loss = 4.2201e-03, PNorm = 182.8821, GNorm = 0.0958, lr_0 = 2.0903e-04
Loss = 7.2672e-03, PNorm = 182.8870, GNorm = 0.0936, lr_0 = 2.0889e-04
Loss = 5.8540e-03, PNorm = 182.8919, GNorm = 0.1939, lr_0 = 2.0874e-04
Loss = 3.5706e-03, PNorm = 182.8965, GNorm = 0.0994, lr_0 = 2.0860e-04
Loss = 5.4830e-03, PNorm = 182.9010, GNorm = 0.3010, lr_0 = 2.0846e-04
Loss = 7.4825e-03, PNorm = 182.9052, GNorm = 0.2012, lr_0 = 2.0831e-04
Loss = 6.1082e-03, PNorm = 182.9077, GNorm = 0.1202, lr_0 = 2.0817e-04
Loss = 3.3398e-03, PNorm = 182.9111, GNorm = 0.2107, lr_0 = 2.0803e-04
Loss = 6.3385e-03, PNorm = 182.9161, GNorm = 0.1710, lr_0 = 2.0789e-04
Loss = 5.3609e-03, PNorm = 182.9197, GNorm = 0.1460, lr_0 = 2.0774e-04
Loss = 7.5139e-03, PNorm = 182.9251, GNorm = 0.1372, lr_0 = 2.0760e-04
Loss = 1.2965e-02, PNorm = 182.9282, GNorm = 0.5729, lr_0 = 2.0746e-04
Loss = 4.6016e-03, PNorm = 182.9329, GNorm = 0.1755, lr_0 = 2.0732e-04
Loss = 9.0008e-03, PNorm = 182.9361, GNorm = 0.2014, lr_0 = 2.0718e-04
Loss = 1.0926e-02, PNorm = 182.9406, GNorm = 0.0827, lr_0 = 2.0703e-04
Loss = 3.3207e-03, PNorm = 182.9469, GNorm = 0.1792, lr_0 = 2.0689e-04
Loss = 5.7580e-03, PNorm = 182.9508, GNorm = 0.1815, lr_0 = 2.0675e-04
Loss = 3.3725e-03, PNorm = 182.9552, GNorm = 0.1374, lr_0 = 2.0661e-04
Loss = 9.3442e-03, PNorm = 182.9592, GNorm = 0.2950, lr_0 = 2.0647e-04
Loss = 3.5641e-03, PNorm = 182.9620, GNorm = 0.2304, lr_0 = 2.0633e-04
Loss = 4.5228e-03, PNorm = 182.9656, GNorm = 0.1048, lr_0 = 2.0618e-04
Loss = 4.2631e-03, PNorm = 182.9694, GNorm = 0.0887, lr_0 = 2.0604e-04
Loss = 5.7912e-03, PNorm = 182.9728, GNorm = 0.2524, lr_0 = 2.0590e-04
Loss = 3.9906e-03, PNorm = 182.9762, GNorm = 0.1813, lr_0 = 2.0576e-04
Loss = 3.6869e-03, PNorm = 182.9805, GNorm = 0.1641, lr_0 = 2.0562e-04
Loss = 6.1092e-03, PNorm = 182.9854, GNorm = 0.2183, lr_0 = 2.0548e-04
Loss = 6.5035e-03, PNorm = 182.9917, GNorm = 0.1564, lr_0 = 2.0534e-04
Loss = 8.0837e-03, PNorm = 182.9970, GNorm = 0.3250, lr_0 = 2.0520e-04
Loss = 5.2854e-03, PNorm = 183.0013, GNorm = 0.0687, lr_0 = 2.0506e-04
Loss = 7.0059e-03, PNorm = 183.0073, GNorm = 0.0786, lr_0 = 2.0492e-04
Loss = 5.0095e-03, PNorm = 183.0121, GNorm = 0.3675, lr_0 = 2.0478e-04
Loss = 3.2915e-03, PNorm = 183.0161, GNorm = 0.1848, lr_0 = 2.0464e-04
Loss = 3.4707e-03, PNorm = 183.0202, GNorm = 0.1779, lr_0 = 2.0450e-04
Loss = 7.0281e-03, PNorm = 183.0239, GNorm = 0.0584, lr_0 = 2.0436e-04
Loss = 6.3942e-03, PNorm = 183.0274, GNorm = 0.1721, lr_0 = 2.0422e-04
Loss = 6.4861e-03, PNorm = 183.0330, GNorm = 0.2684, lr_0 = 2.0408e-04
Loss = 5.8214e-03, PNorm = 183.0390, GNorm = 0.2755, lr_0 = 2.0394e-04
Loss = 4.7820e-03, PNorm = 183.0415, GNorm = 0.1687, lr_0 = 2.0380e-04
Loss = 4.4959e-03, PNorm = 183.0455, GNorm = 0.1991, lr_0 = 2.0366e-04
Loss = 3.5170e-03, PNorm = 183.0485, GNorm = 0.2049, lr_0 = 2.0352e-04
Loss = 4.8821e-03, PNorm = 183.0552, GNorm = 0.1366, lr_0 = 2.0338e-04
Loss = 3.3521e-03, PNorm = 183.0606, GNorm = 0.0845, lr_0 = 2.0324e-04
Loss = 5.1809e-03, PNorm = 183.0647, GNorm = 0.1271, lr_0 = 2.0310e-04
Loss = 4.6409e-03, PNorm = 183.0692, GNorm = 0.0918, lr_0 = 2.0296e-04
Loss = 3.8065e-03, PNorm = 183.0739, GNorm = 0.1757, lr_0 = 2.0282e-04
Loss = 3.3087e-03, PNorm = 183.0795, GNorm = 0.1255, lr_0 = 2.0268e-04
Loss = 3.1858e-03, PNorm = 183.0837, GNorm = 0.2255, lr_0 = 2.0254e-04
Loss = 3.6684e-03, PNorm = 183.0877, GNorm = 0.1516, lr_0 = 2.0240e-04
Loss = 7.0315e-03, PNorm = 183.0907, GNorm = 0.1111, lr_0 = 2.0227e-04
Loss = 5.1258e-03, PNorm = 183.0944, GNorm = 0.2292, lr_0 = 2.0213e-04
Loss = 4.4583e-03, PNorm = 183.1007, GNorm = 0.1874, lr_0 = 2.0199e-04
Loss = 9.9910e-03, PNorm = 183.1050, GNorm = 0.1214, lr_0 = 2.0185e-04
Loss = 5.2037e-03, PNorm = 183.1109, GNorm = 0.1274, lr_0 = 2.0171e-04
Loss = 6.1086e-03, PNorm = 183.1175, GNorm = 0.1362, lr_0 = 2.0157e-04
Loss = 6.8353e-03, PNorm = 183.1206, GNorm = 0.1079, lr_0 = 2.0144e-04
Loss = 6.2886e-03, PNorm = 183.1232, GNorm = 0.2349, lr_0 = 2.0130e-04
Loss = 1.6700e-02, PNorm = 183.1271, GNorm = 0.1331, lr_0 = 2.0116e-04
Loss = 6.9350e-03, PNorm = 183.1300, GNorm = 0.4083, lr_0 = 2.0102e-04
Loss = 5.5011e-03, PNorm = 183.1328, GNorm = 0.0709, lr_0 = 2.0088e-04
Loss = 5.8601e-03, PNorm = 183.1371, GNorm = 0.1631, lr_0 = 2.0075e-04
Loss = 4.3312e-03, PNorm = 183.1393, GNorm = 0.3571, lr_0 = 2.0061e-04
Loss = 7.3619e-03, PNorm = 183.1410, GNorm = 0.1244, lr_0 = 2.0047e-04
Loss = 5.7634e-03, PNorm = 183.1445, GNorm = 0.1304, lr_0 = 2.0033e-04
Loss = 8.1378e-03, PNorm = 183.1496, GNorm = 0.2319, lr_0 = 2.0020e-04
Loss = 8.5683e-03, PNorm = 183.1549, GNorm = 0.1649, lr_0 = 2.0006e-04
Loss = 4.8096e-03, PNorm = 183.1611, GNorm = 0.0893, lr_0 = 1.9992e-04
Loss = 4.0977e-03, PNorm = 183.1656, GNorm = 0.2157, lr_0 = 1.9979e-04
Loss = 4.8736e-03, PNorm = 183.1710, GNorm = 0.6745, lr_0 = 1.9965e-04
Loss = 3.8109e-03, PNorm = 183.1775, GNorm = 0.0800, lr_0 = 1.9951e-04
Loss = 6.0733e-03, PNorm = 183.1806, GNorm = 0.1966, lr_0 = 1.9938e-04
Loss = 3.3397e-03, PNorm = 183.1833, GNorm = 0.1976, lr_0 = 1.9924e-04
Loss = 5.3251e-03, PNorm = 183.1877, GNorm = 0.1208, lr_0 = 1.9910e-04
Loss = 6.3894e-03, PNorm = 183.1929, GNorm = 0.2093, lr_0 = 1.9897e-04
Loss = 8.8713e-03, PNorm = 183.1958, GNorm = 0.1265, lr_0 = 1.9883e-04
Loss = 3.1211e-03, PNorm = 183.2007, GNorm = 0.1063, lr_0 = 1.9869e-04
Loss = 4.7851e-03, PNorm = 183.2064, GNorm = 0.1294, lr_0 = 1.9856e-04
Loss = 3.8473e-03, PNorm = 183.2120, GNorm = 0.2522, lr_0 = 1.9842e-04
Loss = 3.9784e-03, PNorm = 183.2176, GNorm = 0.1056, lr_0 = 1.9829e-04
Loss = 5.0600e-03, PNorm = 183.2233, GNorm = 0.2174, lr_0 = 1.9815e-04
Loss = 3.5254e-03, PNorm = 183.2277, GNorm = 0.0770, lr_0 = 1.9801e-04
Loss = 6.7510e-03, PNorm = 183.2308, GNorm = 0.1751, lr_0 = 1.9788e-04
Loss = 5.5691e-03, PNorm = 183.2346, GNorm = 0.1178, lr_0 = 1.9774e-04
Loss = 5.8554e-03, PNorm = 183.2382, GNorm = 0.2789, lr_0 = 1.9761e-04
Loss = 4.2310e-03, PNorm = 183.2418, GNorm = 0.1281, lr_0 = 1.9747e-04
Loss = 6.3693e-03, PNorm = 183.2463, GNorm = 0.0905, lr_0 = 1.9734e-04
Loss = 6.0801e-03, PNorm = 183.2530, GNorm = 0.1129, lr_0 = 1.9720e-04
Loss = 7.3198e-03, PNorm = 183.2575, GNorm = 0.1794, lr_0 = 1.9707e-04
Loss = 5.3921e-03, PNorm = 183.2642, GNorm = 0.1712, lr_0 = 1.9693e-04
Loss = 3.9470e-03, PNorm = 183.2693, GNorm = 0.1104, lr_0 = 1.9680e-04
Loss = 3.8346e-03, PNorm = 183.2734, GNorm = 0.2011, lr_0 = 1.9666e-04
Loss = 3.5636e-03, PNorm = 183.2777, GNorm = 0.1410, lr_0 = 1.9653e-04
Loss = 8.5658e-03, PNorm = 183.2837, GNorm = 0.1110, lr_0 = 1.9639e-04
Loss = 5.1900e-03, PNorm = 183.2881, GNorm = 0.1008, lr_0 = 1.9626e-04
Loss = 4.0784e-03, PNorm = 183.2913, GNorm = 0.3174, lr_0 = 1.9612e-04
Loss = 6.2171e-03, PNorm = 183.2969, GNorm = 0.2305, lr_0 = 1.9599e-04
Loss = 4.6003e-03, PNorm = 183.3026, GNorm = 0.2387, lr_0 = 1.9585e-04
Loss = 7.5013e-03, PNorm = 183.3072, GNorm = 0.0720, lr_0 = 1.9572e-04
Loss = 6.3559e-03, PNorm = 183.3122, GNorm = 0.1135, lr_0 = 1.9559e-04
Loss = 6.8838e-03, PNorm = 183.3184, GNorm = 0.1388, lr_0 = 1.9545e-04
Loss = 5.3906e-03, PNorm = 183.3234, GNorm = 0.1034, lr_0 = 1.9532e-04
Loss = 5.5601e-03, PNorm = 183.3281, GNorm = 0.1870, lr_0 = 1.9518e-04
Loss = 4.2768e-03, PNorm = 183.3318, GNorm = 0.1605, lr_0 = 1.9505e-04
Loss = 5.4598e-03, PNorm = 183.3361, GNorm = 0.1389, lr_0 = 1.9492e-04
Loss = 1.1001e-02, PNorm = 183.3410, GNorm = 0.1816, lr_0 = 1.9478e-04
Loss = 8.1879e-03, PNorm = 183.3431, GNorm = 0.2179, lr_0 = 1.9465e-04
Loss = 3.8640e-03, PNorm = 183.3487, GNorm = 0.0860, lr_0 = 1.9452e-04
Loss = 4.9613e-03, PNorm = 183.3542, GNorm = 0.2170, lr_0 = 1.9438e-04
Loss = 5.4867e-03, PNorm = 183.3582, GNorm = 0.2036, lr_0 = 1.9425e-04
Loss = 8.0409e-03, PNorm = 183.3641, GNorm = 0.1494, lr_0 = 1.9412e-04
Loss = 5.4373e-03, PNorm = 183.3686, GNorm = 0.1233, lr_0 = 1.9398e-04
Loss = 3.6022e-03, PNorm = 183.3730, GNorm = 0.1174, lr_0 = 1.9385e-04
Loss = 4.9711e-03, PNorm = 183.3769, GNorm = 0.1410, lr_0 = 1.9372e-04
Loss = 7.1506e-03, PNorm = 183.3806, GNorm = 0.0804, lr_0 = 1.9359e-04
Loss = 3.4315e-03, PNorm = 183.3856, GNorm = 0.0965, lr_0 = 1.9345e-04
Loss = 4.3492e-03, PNorm = 183.3900, GNorm = 0.3220, lr_0 = 1.9332e-04
Loss = 9.2288e-03, PNorm = 183.3963, GNorm = 1.9679, lr_0 = 1.9319e-04
Loss = 8.8674e-03, PNorm = 183.4000, GNorm = 0.2517, lr_0 = 1.9306e-04
Validation mae = 0.121123
Epoch 22
Loss = 5.8898e-03, PNorm = 183.4068, GNorm = 0.1651, lr_0 = 1.9292e-04
Loss = 3.6810e-03, PNorm = 183.4132, GNorm = 0.2883, lr_0 = 1.9279e-04
Loss = 4.8795e-03, PNorm = 183.4163, GNorm = 0.0911, lr_0 = 1.9266e-04
Loss = 4.6049e-03, PNorm = 183.4210, GNorm = 0.3354, lr_0 = 1.9253e-04
Loss = 3.0815e-03, PNorm = 183.4229, GNorm = 0.0983, lr_0 = 1.9240e-04
Loss = 3.4580e-03, PNorm = 183.4263, GNorm = 0.1751, lr_0 = 1.9226e-04
Loss = 4.8036e-03, PNorm = 183.4295, GNorm = 0.2497, lr_0 = 1.9213e-04
Loss = 4.1461e-03, PNorm = 183.4340, GNorm = 0.2084, lr_0 = 1.9200e-04
Loss = 4.0949e-03, PNorm = 183.4376, GNorm = 0.0783, lr_0 = 1.9187e-04
Loss = 3.8134e-03, PNorm = 183.4428, GNorm = 0.1136, lr_0 = 1.9174e-04
Loss = 3.5238e-03, PNorm = 183.4467, GNorm = 0.2279, lr_0 = 1.9161e-04
Loss = 8.2209e-03, PNorm = 183.4486, GNorm = 0.1989, lr_0 = 1.9148e-04
Loss = 2.7436e-03, PNorm = 183.4529, GNorm = 0.2183, lr_0 = 1.9134e-04
Loss = 4.4756e-03, PNorm = 183.4567, GNorm = 0.2667, lr_0 = 1.9121e-04
Loss = 6.4277e-03, PNorm = 183.4598, GNorm = 0.0757, lr_0 = 1.9108e-04
Loss = 3.5994e-03, PNorm = 183.4633, GNorm = 0.0582, lr_0 = 1.9095e-04
Loss = 5.6532e-03, PNorm = 183.4676, GNorm = 0.1166, lr_0 = 1.9082e-04
Loss = 4.7313e-03, PNorm = 183.4712, GNorm = 0.3569, lr_0 = 1.9069e-04
Loss = 3.1461e-03, PNorm = 183.4750, GNorm = 0.1749, lr_0 = 1.9056e-04
Loss = 4.9165e-03, PNorm = 183.4793, GNorm = 0.0822, lr_0 = 1.9043e-04
Loss = 5.5682e-03, PNorm = 183.4831, GNorm = 0.2126, lr_0 = 1.9030e-04
Loss = 3.0906e-03, PNorm = 183.4864, GNorm = 0.1499, lr_0 = 1.9017e-04
Loss = 3.5806e-03, PNorm = 183.4893, GNorm = 0.2654, lr_0 = 1.9004e-04
Loss = 4.6419e-03, PNorm = 183.4925, GNorm = 0.1029, lr_0 = 1.8991e-04
Loss = 5.7013e-03, PNorm = 183.4952, GNorm = 0.1733, lr_0 = 1.8978e-04
Loss = 6.9120e-03, PNorm = 183.4967, GNorm = 0.1203, lr_0 = 1.8965e-04
Loss = 4.8350e-03, PNorm = 183.4989, GNorm = 0.1881, lr_0 = 1.8952e-04
Loss = 6.1528e-03, PNorm = 183.5026, GNorm = 0.1010, lr_0 = 1.8939e-04
Loss = 3.8061e-03, PNorm = 183.5062, GNorm = 0.0581, lr_0 = 1.8926e-04
Loss = 4.2465e-03, PNorm = 183.5095, GNorm = 0.1655, lr_0 = 1.8913e-04
Loss = 5.1485e-03, PNorm = 183.5129, GNorm = 0.2201, lr_0 = 1.8900e-04
Loss = 4.1409e-03, PNorm = 183.5157, GNorm = 0.1137, lr_0 = 1.8887e-04
Loss = 4.4263e-03, PNorm = 183.5204, GNorm = 0.1820, lr_0 = 1.8874e-04
Loss = 3.0896e-03, PNorm = 183.5233, GNorm = 0.1285, lr_0 = 1.8861e-04
Loss = 4.6028e-03, PNorm = 183.5255, GNorm = 0.0928, lr_0 = 1.8848e-04
Loss = 2.9755e-03, PNorm = 183.5291, GNorm = 0.1262, lr_0 = 1.8835e-04
Loss = 5.9371e-03, PNorm = 183.5299, GNorm = 0.1101, lr_0 = 1.8822e-04
Loss = 6.5314e-03, PNorm = 183.5356, GNorm = 0.0698, lr_0 = 1.8809e-04
Loss = 5.3587e-03, PNorm = 183.5415, GNorm = 0.1288, lr_0 = 1.8797e-04
Loss = 4.9613e-03, PNorm = 183.5471, GNorm = 0.1322, lr_0 = 1.8784e-04
Loss = 3.7669e-03, PNorm = 183.5513, GNorm = 0.1124, lr_0 = 1.8771e-04
Loss = 2.7551e-03, PNorm = 183.5539, GNorm = 0.1227, lr_0 = 1.8758e-04
Loss = 3.0993e-03, PNorm = 183.5568, GNorm = 0.2826, lr_0 = 1.8745e-04
Loss = 3.7681e-03, PNorm = 183.5581, GNorm = 0.4146, lr_0 = 1.8732e-04
Loss = 4.9566e-03, PNorm = 183.5616, GNorm = 0.5511, lr_0 = 1.8719e-04
Loss = 2.9146e-03, PNorm = 183.5701, GNorm = 0.1733, lr_0 = 1.8707e-04
Loss = 6.7009e-03, PNorm = 183.5763, GNorm = 0.1906, lr_0 = 1.8694e-04
Loss = 3.6268e-03, PNorm = 183.5822, GNorm = 0.2358, lr_0 = 1.8681e-04
Loss = 8.9304e-03, PNorm = 183.5870, GNorm = 0.0960, lr_0 = 1.8668e-04
Loss = 9.5182e-03, PNorm = 183.5908, GNorm = 0.0813, lr_0 = 1.8655e-04
Loss = 4.5574e-03, PNorm = 183.5938, GNorm = 0.3210, lr_0 = 1.8643e-04
Loss = 8.4575e-03, PNorm = 183.5966, GNorm = 0.9094, lr_0 = 1.8630e-04
Loss = 1.1628e-02, PNorm = 183.5997, GNorm = 0.9297, lr_0 = 1.8617e-04
Loss = 3.8342e-03, PNorm = 183.6003, GNorm = 0.2719, lr_0 = 1.8604e-04
Loss = 3.9137e-03, PNorm = 183.6057, GNorm = 0.1547, lr_0 = 1.8592e-04
Loss = 2.4045e-03, PNorm = 183.6093, GNorm = 0.1527, lr_0 = 1.8579e-04
Loss = 4.5296e-03, PNorm = 183.6141, GNorm = 0.1457, lr_0 = 1.8566e-04
Loss = 5.1146e-03, PNorm = 183.6199, GNorm = 0.1541, lr_0 = 1.8553e-04
Loss = 6.7163e-03, PNorm = 183.6247, GNorm = 0.1359, lr_0 = 1.8541e-04
Loss = 8.1327e-03, PNorm = 183.6312, GNorm = 0.1204, lr_0 = 1.8528e-04
Loss = 7.1694e-03, PNorm = 183.6362, GNorm = 0.2356, lr_0 = 1.8515e-04
Loss = 8.1755e-03, PNorm = 183.6424, GNorm = 0.1297, lr_0 = 1.8503e-04
Loss = 7.5130e-03, PNorm = 183.6467, GNorm = 0.2585, lr_0 = 1.8490e-04
Loss = 3.7464e-03, PNorm = 183.6498, GNorm = 0.1537, lr_0 = 1.8477e-04
Loss = 4.4866e-03, PNorm = 183.6518, GNorm = 0.1004, lr_0 = 1.8465e-04
Loss = 3.5022e-03, PNorm = 183.6548, GNorm = 0.1021, lr_0 = 1.8452e-04
Loss = 4.2754e-03, PNorm = 183.6572, GNorm = 0.1714, lr_0 = 1.8439e-04
Loss = 3.4890e-03, PNorm = 183.6606, GNorm = 0.1343, lr_0 = 1.8427e-04
Loss = 2.6755e-03, PNorm = 183.6634, GNorm = 0.0971, lr_0 = 1.8414e-04
Loss = 3.4300e-03, PNorm = 183.6653, GNorm = 0.1233, lr_0 = 1.8401e-04
Loss = 5.7460e-03, PNorm = 183.6673, GNorm = 0.2326, lr_0 = 1.8389e-04
Loss = 2.8913e-03, PNorm = 183.6717, GNorm = 0.1716, lr_0 = 1.8376e-04
Loss = 6.1032e-03, PNorm = 183.6773, GNorm = 0.2936, lr_0 = 1.8364e-04
Loss = 3.9228e-03, PNorm = 183.6829, GNorm = 0.1961, lr_0 = 1.8351e-04
Loss = 3.8940e-03, PNorm = 183.6865, GNorm = 0.2727, lr_0 = 1.8338e-04
Loss = 3.1521e-03, PNorm = 183.6881, GNorm = 0.2394, lr_0 = 1.8326e-04
Loss = 7.0081e-03, PNorm = 183.6902, GNorm = 0.1115, lr_0 = 1.8313e-04
Loss = 2.6431e-03, PNorm = 183.6936, GNorm = 0.0607, lr_0 = 1.8301e-04
Loss = 5.4805e-03, PNorm = 183.6970, GNorm = 0.2697, lr_0 = 1.8288e-04
Loss = 3.4628e-03, PNorm = 183.7001, GNorm = 0.0967, lr_0 = 1.8276e-04
Loss = 5.8321e-03, PNorm = 183.7037, GNorm = 0.0798, lr_0 = 1.8263e-04
Loss = 2.8768e-03, PNorm = 183.7079, GNorm = 0.0861, lr_0 = 1.8251e-04
Loss = 5.2748e-03, PNorm = 183.7105, GNorm = 0.1214, lr_0 = 1.8238e-04
Loss = 4.1620e-03, PNorm = 183.7126, GNorm = 0.1485, lr_0 = 1.8226e-04
Loss = 3.8675e-03, PNorm = 183.7181, GNorm = 0.0920, lr_0 = 1.8213e-04
Loss = 5.6866e-03, PNorm = 183.7217, GNorm = 0.3065, lr_0 = 1.8201e-04
Loss = 4.1232e-03, PNorm = 183.7242, GNorm = 0.2147, lr_0 = 1.8188e-04
Loss = 3.2438e-03, PNorm = 183.7294, GNorm = 0.1276, lr_0 = 1.8176e-04
Loss = 4.3852e-03, PNorm = 183.7339, GNorm = 0.0835, lr_0 = 1.8163e-04
Loss = 5.6230e-03, PNorm = 183.7371, GNorm = 0.1421, lr_0 = 1.8151e-04
Loss = 6.1738e-03, PNorm = 183.7417, GNorm = 0.1036, lr_0 = 1.8138e-04
Loss = 3.3348e-03, PNorm = 183.7450, GNorm = 0.1901, lr_0 = 1.8126e-04
Loss = 5.0647e-03, PNorm = 183.7486, GNorm = 0.0841, lr_0 = 1.8114e-04
Loss = 5.7943e-03, PNorm = 183.7518, GNorm = 0.1062, lr_0 = 1.8101e-04
Loss = 7.3639e-03, PNorm = 183.7557, GNorm = 0.2879, lr_0 = 1.8089e-04
Loss = 6.2376e-03, PNorm = 183.7581, GNorm = 0.0906, lr_0 = 1.8076e-04
Loss = 7.6463e-03, PNorm = 183.7609, GNorm = 0.4024, lr_0 = 1.8064e-04
Loss = 3.7020e-03, PNorm = 183.7648, GNorm = 0.1810, lr_0 = 1.8052e-04
Loss = 7.4861e-03, PNorm = 183.7692, GNorm = 0.1436, lr_0 = 1.8039e-04
Loss = 4.4947e-03, PNorm = 183.7720, GNorm = 0.1400, lr_0 = 1.8027e-04
Loss = 5.9382e-03, PNorm = 183.7742, GNorm = 0.1198, lr_0 = 1.8015e-04
Loss = 5.7910e-03, PNorm = 183.7761, GNorm = 0.8087, lr_0 = 1.8002e-04
Loss = 2.8504e-03, PNorm = 183.7768, GNorm = 0.1121, lr_0 = 1.7990e-04
Loss = 3.9465e-03, PNorm = 183.7799, GNorm = 0.1335, lr_0 = 1.7978e-04
Loss = 4.2470e-03, PNorm = 183.7849, GNorm = 0.1096, lr_0 = 1.7965e-04
Loss = 9.1999e-03, PNorm = 183.7913, GNorm = 0.7035, lr_0 = 1.7953e-04
Loss = 3.6571e-03, PNorm = 183.7942, GNorm = 0.2706, lr_0 = 1.7941e-04
Loss = 8.7366e-03, PNorm = 183.7978, GNorm = 0.1516, lr_0 = 1.7928e-04
Loss = 3.3872e-03, PNorm = 183.8020, GNorm = 0.0879, lr_0 = 1.7916e-04
Loss = 5.4564e-03, PNorm = 183.8050, GNorm = 0.1158, lr_0 = 1.7904e-04
Loss = 1.1115e-02, PNorm = 183.8087, GNorm = 0.0971, lr_0 = 1.7892e-04
Loss = 1.1644e-02, PNorm = 183.8121, GNorm = 0.3063, lr_0 = 1.7879e-04
Loss = 3.5987e-03, PNorm = 183.8163, GNorm = 0.1211, lr_0 = 1.7867e-04
Loss = 5.6371e-03, PNorm = 183.8211, GNorm = 0.2072, lr_0 = 1.7855e-04
Loss = 1.7977e-02, PNorm = 183.8284, GNorm = 0.3301, lr_0 = 1.7843e-04
Loss = 3.8935e-03, PNorm = 183.8320, GNorm = 0.1517, lr_0 = 1.7830e-04
Loss = 3.0691e-03, PNorm = 183.8365, GNorm = 0.1453, lr_0 = 1.7818e-04
Loss = 1.0221e-02, PNorm = 183.8412, GNorm = 0.1597, lr_0 = 1.7806e-04
Loss = 4.6979e-03, PNorm = 183.8446, GNorm = 0.0732, lr_0 = 1.7794e-04
Loss = 5.6880e-03, PNorm = 183.8457, GNorm = 0.2728, lr_0 = 1.7782e-04
Validation mae = 0.121012
Epoch 23
Loss = 4.0523e-03, PNorm = 183.8492, GNorm = 0.0919, lr_0 = 1.7769e-04
Loss = 3.9425e-03, PNorm = 183.8514, GNorm = 0.0681, lr_0 = 1.7757e-04
Loss = 4.6816e-03, PNorm = 183.8548, GNorm = 0.1537, lr_0 = 1.7745e-04
Loss = 3.3609e-03, PNorm = 183.8588, GNorm = 0.1525, lr_0 = 1.7733e-04
Loss = 3.0127e-03, PNorm = 183.8622, GNorm = 0.3781, lr_0 = 1.7721e-04
Loss = 2.4693e-03, PNorm = 183.8663, GNorm = 0.1296, lr_0 = 1.7709e-04
Loss = 3.6740e-03, PNorm = 183.8694, GNorm = 0.1141, lr_0 = 1.7696e-04
Loss = 5.4143e-03, PNorm = 183.8727, GNorm = 0.3331, lr_0 = 1.7684e-04
Loss = 3.0711e-03, PNorm = 183.8749, GNorm = 0.1110, lr_0 = 1.7672e-04
Loss = 5.7289e-03, PNorm = 183.8767, GNorm = 0.4197, lr_0 = 1.7660e-04
Loss = 7.4727e-03, PNorm = 183.8790, GNorm = 0.8876, lr_0 = 1.7648e-04
Loss = 3.8198e-03, PNorm = 183.8801, GNorm = 0.1901, lr_0 = 1.7636e-04
Loss = 5.9318e-03, PNorm = 183.8832, GNorm = 0.1408, lr_0 = 1.7624e-04
Loss = 3.6751e-03, PNorm = 183.8859, GNorm = 0.0577, lr_0 = 1.7612e-04
Loss = 3.4090e-03, PNorm = 183.8890, GNorm = 0.0922, lr_0 = 1.7600e-04
Loss = 5.4831e-03, PNorm = 183.8921, GNorm = 0.0966, lr_0 = 1.7588e-04
Loss = 3.6738e-03, PNorm = 183.8951, GNorm = 0.0702, lr_0 = 1.7576e-04
Loss = 3.0032e-03, PNorm = 183.8980, GNorm = 0.1932, lr_0 = 1.7564e-04
Loss = 1.1707e-02, PNorm = 183.9020, GNorm = 0.3498, lr_0 = 1.7552e-04
Loss = 3.6205e-03, PNorm = 183.9053, GNorm = 0.0952, lr_0 = 1.7540e-04
Loss = 5.5134e-03, PNorm = 183.9098, GNorm = 0.3086, lr_0 = 1.7528e-04
Loss = 7.0843e-03, PNorm = 183.9131, GNorm = 0.0976, lr_0 = 1.7516e-04
Loss = 5.0908e-03, PNorm = 183.9155, GNorm = 0.2922, lr_0 = 1.7504e-04
Loss = 5.8529e-03, PNorm = 183.9185, GNorm = 0.4596, lr_0 = 1.7492e-04
Loss = 3.8171e-03, PNorm = 183.9230, GNorm = 0.1126, lr_0 = 1.7480e-04
Loss = 5.4388e-03, PNorm = 183.9254, GNorm = 0.3944, lr_0 = 1.7468e-04
Loss = 2.5118e-03, PNorm = 183.9287, GNorm = 0.0979, lr_0 = 1.7456e-04
Loss = 4.6392e-03, PNorm = 183.9346, GNorm = 0.1733, lr_0 = 1.7444e-04
Loss = 3.3194e-03, PNorm = 183.9382, GNorm = 0.0909, lr_0 = 1.7432e-04
Loss = 2.7472e-03, PNorm = 183.9398, GNorm = 0.1631, lr_0 = 1.7420e-04
Loss = 3.5162e-03, PNorm = 183.9411, GNorm = 0.1155, lr_0 = 1.7408e-04
Loss = 5.6343e-03, PNorm = 183.9432, GNorm = 0.6370, lr_0 = 1.7396e-04
Loss = 5.7488e-03, PNorm = 183.9450, GNorm = 0.2210, lr_0 = 1.7384e-04
Loss = 8.3657e-03, PNorm = 183.9487, GNorm = 0.1884, lr_0 = 1.7372e-04
Loss = 2.7156e-03, PNorm = 183.9523, GNorm = 0.0839, lr_0 = 1.7360e-04
Loss = 6.6484e-03, PNorm = 183.9531, GNorm = 0.0911, lr_0 = 1.7348e-04
Loss = 2.8166e-03, PNorm = 183.9544, GNorm = 0.1630, lr_0 = 1.7336e-04
Loss = 4.4795e-03, PNorm = 183.9567, GNorm = 0.2438, lr_0 = 1.7325e-04
Loss = 5.0939e-03, PNorm = 183.9576, GNorm = 0.2492, lr_0 = 1.7313e-04
Loss = 7.8784e-03, PNorm = 183.9601, GNorm = 0.1269, lr_0 = 1.7301e-04
Loss = 2.6623e-03, PNorm = 183.9644, GNorm = 0.1329, lr_0 = 1.7289e-04
Loss = 2.0981e-03, PNorm = 183.9696, GNorm = 0.1112, lr_0 = 1.7277e-04
Loss = 3.1301e-03, PNorm = 183.9729, GNorm = 0.1124, lr_0 = 1.7265e-04
Loss = 4.3298e-03, PNorm = 183.9762, GNorm = 0.1048, lr_0 = 1.7253e-04
Loss = 4.8702e-03, PNorm = 183.9816, GNorm = 0.0758, lr_0 = 1.7242e-04
Loss = 3.8971e-03, PNorm = 183.9863, GNorm = 0.1413, lr_0 = 1.7230e-04
Loss = 7.9321e-03, PNorm = 183.9903, GNorm = 0.1474, lr_0 = 1.7218e-04
Loss = 4.3666e-03, PNorm = 183.9927, GNorm = 0.1299, lr_0 = 1.7206e-04
Loss = 5.3017e-03, PNorm = 183.9962, GNorm = 0.1681, lr_0 = 1.7194e-04
Loss = 5.1252e-03, PNorm = 184.0002, GNorm = 0.1930, lr_0 = 1.7183e-04
Loss = 7.0184e-03, PNorm = 184.0027, GNorm = 0.0816, lr_0 = 1.7171e-04
Loss = 5.4611e-03, PNorm = 184.0067, GNorm = 0.1269, lr_0 = 1.7159e-04
Loss = 2.7874e-03, PNorm = 184.0106, GNorm = 0.1843, lr_0 = 1.7147e-04
Loss = 4.4471e-03, PNorm = 184.0152, GNorm = 0.0998, lr_0 = 1.7136e-04
Loss = 5.0294e-03, PNorm = 184.0189, GNorm = 0.1636, lr_0 = 1.7124e-04
Loss = 2.7253e-03, PNorm = 184.0219, GNorm = 0.0638, lr_0 = 1.7112e-04
Loss = 3.4445e-03, PNorm = 184.0248, GNorm = 0.3721, lr_0 = 1.7100e-04
Loss = 7.1479e-03, PNorm = 184.0276, GNorm = 0.2719, lr_0 = 1.7089e-04
Loss = 5.4767e-03, PNorm = 184.0300, GNorm = 0.2088, lr_0 = 1.7077e-04
Loss = 2.3428e-03, PNorm = 184.0333, GNorm = 0.2207, lr_0 = 1.7065e-04
Loss = 3.7017e-03, PNorm = 184.0371, GNorm = 0.1579, lr_0 = 1.7054e-04
Loss = 1.1024e-02, PNorm = 184.0413, GNorm = 0.0610, lr_0 = 1.7042e-04
Loss = 4.5806e-03, PNorm = 184.0420, GNorm = 0.1608, lr_0 = 1.7030e-04
Loss = 7.6076e-03, PNorm = 184.0444, GNorm = 1.1179, lr_0 = 1.7019e-04
Loss = 7.7016e-03, PNorm = 184.0461, GNorm = 0.1551, lr_0 = 1.7007e-04
Loss = 3.1476e-03, PNorm = 184.0498, GNorm = 0.1058, lr_0 = 1.6995e-04
Loss = 7.1121e-03, PNorm = 184.0547, GNorm = 0.2638, lr_0 = 1.6984e-04
Loss = 1.4642e-02, PNorm = 184.0566, GNorm = 0.2003, lr_0 = 1.6972e-04
Loss = 5.1329e-03, PNorm = 184.0597, GNorm = 0.1581, lr_0 = 1.6960e-04
Loss = 6.3541e-03, PNorm = 184.0628, GNorm = 0.1392, lr_0 = 1.6949e-04
Loss = 2.6446e-03, PNorm = 184.0654, GNorm = 0.2059, lr_0 = 1.6937e-04
Loss = 7.1055e-03, PNorm = 184.0673, GNorm = 0.1670, lr_0 = 1.6926e-04
Loss = 2.2501e-03, PNorm = 184.0696, GNorm = 0.1315, lr_0 = 1.6914e-04
Loss = 6.7210e-03, PNorm = 184.0733, GNorm = 0.1128, lr_0 = 1.6902e-04
Loss = 2.6313e-03, PNorm = 184.0773, GNorm = 0.1180, lr_0 = 1.6891e-04
Loss = 9.3514e-03, PNorm = 184.0787, GNorm = 0.0859, lr_0 = 1.6879e-04
Loss = 3.7177e-03, PNorm = 184.0827, GNorm = 0.1343, lr_0 = 1.6868e-04
Loss = 9.1326e-03, PNorm = 184.0859, GNorm = 0.3045, lr_0 = 1.6856e-04
Loss = 4.1088e-03, PNorm = 184.0897, GNorm = 0.0903, lr_0 = 1.6845e-04
Loss = 4.6964e-03, PNorm = 184.0958, GNorm = 0.1418, lr_0 = 1.6833e-04
Loss = 2.2533e-03, PNorm = 184.1011, GNorm = 0.1853, lr_0 = 1.6821e-04
Loss = 3.3785e-03, PNorm = 184.1045, GNorm = 0.1002, lr_0 = 1.6810e-04
Loss = 3.5425e-03, PNorm = 184.1085, GNorm = 0.2189, lr_0 = 1.6798e-04
Loss = 5.4561e-03, PNorm = 184.1127, GNorm = 0.1333, lr_0 = 1.6787e-04
Loss = 3.0046e-03, PNorm = 184.1157, GNorm = 0.1807, lr_0 = 1.6775e-04
Loss = 2.7747e-03, PNorm = 184.1181, GNorm = 0.2490, lr_0 = 1.6764e-04
Loss = 2.2046e-03, PNorm = 184.1216, GNorm = 0.0698, lr_0 = 1.6752e-04
Loss = 2.4152e-03, PNorm = 184.1241, GNorm = 0.1561, lr_0 = 1.6741e-04
Loss = 6.7654e-03, PNorm = 184.1258, GNorm = 0.1364, lr_0 = 1.6729e-04
Loss = 2.7653e-03, PNorm = 184.1313, GNorm = 0.1315, lr_0 = 1.6718e-04
Loss = 1.0055e-02, PNorm = 184.1360, GNorm = 0.1022, lr_0 = 1.6707e-04
Loss = 3.7588e-03, PNorm = 184.1400, GNorm = 0.0849, lr_0 = 1.6695e-04
Loss = 3.0572e-03, PNorm = 184.1425, GNorm = 0.2307, lr_0 = 1.6684e-04
Loss = 2.1467e-03, PNorm = 184.1462, GNorm = 0.1604, lr_0 = 1.6672e-04
Loss = 2.4075e-03, PNorm = 184.1488, GNorm = 0.1198, lr_0 = 1.6661e-04
Loss = 3.7298e-03, PNorm = 184.1512, GNorm = 0.2649, lr_0 = 1.6649e-04
Loss = 5.4639e-03, PNorm = 184.1548, GNorm = 0.1511, lr_0 = 1.6638e-04
Loss = 3.6275e-03, PNorm = 184.1574, GNorm = 0.1010, lr_0 = 1.6627e-04
Loss = 3.3764e-03, PNorm = 184.1597, GNorm = 0.2703, lr_0 = 1.6615e-04
Loss = 7.0358e-03, PNorm = 184.1620, GNorm = 0.2243, lr_0 = 1.6604e-04
Loss = 6.4084e-03, PNorm = 184.1638, GNorm = 0.8913, lr_0 = 1.6592e-04
Loss = 2.4228e-03, PNorm = 184.1686, GNorm = 0.1277, lr_0 = 1.6581e-04
Loss = 3.4655e-03, PNorm = 184.1722, GNorm = 0.1342, lr_0 = 1.6570e-04
Loss = 4.7235e-03, PNorm = 184.1758, GNorm = 0.1868, lr_0 = 1.6558e-04
Loss = 3.2811e-03, PNorm = 184.1797, GNorm = 0.1503, lr_0 = 1.6547e-04
Loss = 3.6330e-03, PNorm = 184.1837, GNorm = 0.2008, lr_0 = 1.6536e-04
Loss = 5.2249e-03, PNorm = 184.1861, GNorm = 0.1781, lr_0 = 1.6524e-04
Loss = 2.7804e-03, PNorm = 184.1895, GNorm = 0.1016, lr_0 = 1.6513e-04
Loss = 3.0148e-03, PNorm = 184.1936, GNorm = 0.1130, lr_0 = 1.6502e-04
Loss = 4.3233e-03, PNorm = 184.1964, GNorm = 0.2189, lr_0 = 1.6490e-04
Loss = 2.3629e-03, PNorm = 184.1985, GNorm = 0.0510, lr_0 = 1.6479e-04
Loss = 2.3055e-03, PNorm = 184.2011, GNorm = 0.1244, lr_0 = 1.6468e-04
Loss = 7.1163e-03, PNorm = 184.2038, GNorm = 0.2409, lr_0 = 1.6457e-04
Loss = 6.2622e-03, PNorm = 184.2077, GNorm = 0.1352, lr_0 = 1.6445e-04
Loss = 3.1379e-03, PNorm = 184.2099, GNorm = 0.1103, lr_0 = 1.6434e-04
Loss = 2.6347e-03, PNorm = 184.2126, GNorm = 0.0698, lr_0 = 1.6423e-04
Loss = 8.5275e-03, PNorm = 184.2161, GNorm = 0.0881, lr_0 = 1.6412e-04
Loss = 2.7441e-03, PNorm = 184.2173, GNorm = 0.1457, lr_0 = 1.6400e-04
Loss = 4.6068e-03, PNorm = 184.2210, GNorm = 0.1026, lr_0 = 1.6389e-04
Loss = 3.7988e-03, PNorm = 184.2254, GNorm = 0.0590, lr_0 = 1.6378e-04
Validation mae = 0.121263
Epoch 24
Loss = 2.5663e-03, PNorm = 184.2288, GNorm = 0.1415, lr_0 = 1.6367e-04
Loss = 2.3950e-03, PNorm = 184.2311, GNorm = 0.0517, lr_0 = 1.6355e-04
Loss = 5.5293e-03, PNorm = 184.2349, GNorm = 0.1007, lr_0 = 1.6344e-04
Loss = 2.0965e-03, PNorm = 184.2386, GNorm = 0.1971, lr_0 = 1.6333e-04
Loss = 4.2967e-03, PNorm = 184.2409, GNorm = 0.1518, lr_0 = 1.6322e-04
Loss = 5.4867e-03, PNorm = 184.2422, GNorm = 0.1259, lr_0 = 1.6311e-04
Loss = 2.2389e-03, PNorm = 184.2448, GNorm = 0.0952, lr_0 = 1.6299e-04
Loss = 5.0333e-03, PNorm = 184.2472, GNorm = 0.0595, lr_0 = 1.6288e-04
Loss = 2.4388e-03, PNorm = 184.2485, GNorm = 0.0786, lr_0 = 1.6277e-04
Loss = 2.4729e-03, PNorm = 184.2513, GNorm = 0.0842, lr_0 = 1.6266e-04
Loss = 4.0490e-03, PNorm = 184.2530, GNorm = 0.1580, lr_0 = 1.6255e-04
Loss = 9.9492e-03, PNorm = 184.2549, GNorm = 0.1647, lr_0 = 1.6244e-04
Loss = 2.4829e-03, PNorm = 184.2578, GNorm = 0.1078, lr_0 = 1.6233e-04
Loss = 3.4850e-03, PNorm = 184.2593, GNorm = 0.0680, lr_0 = 1.6221e-04
Loss = 2.0311e-03, PNorm = 184.2620, GNorm = 0.1730, lr_0 = 1.6210e-04
Loss = 4.5897e-03, PNorm = 184.2653, GNorm = 0.0585, lr_0 = 1.6199e-04
Loss = 3.2787e-03, PNorm = 184.2689, GNorm = 0.2040, lr_0 = 1.6188e-04
Loss = 2.6176e-03, PNorm = 184.2716, GNorm = 0.1958, lr_0 = 1.6177e-04
Loss = 6.9870e-03, PNorm = 184.2742, GNorm = 0.0840, lr_0 = 1.6166e-04
Loss = 2.3587e-03, PNorm = 184.2775, GNorm = 0.2019, lr_0 = 1.6155e-04
Loss = 2.7461e-03, PNorm = 184.2798, GNorm = 0.2277, lr_0 = 1.6144e-04
Loss = 2.0717e-03, PNorm = 184.2814, GNorm = 0.1246, lr_0 = 1.6133e-04
Loss = 3.9211e-03, PNorm = 184.2847, GNorm = 0.1652, lr_0 = 1.6122e-04
Loss = 4.8469e-03, PNorm = 184.2868, GNorm = 0.2166, lr_0 = 1.6111e-04
Loss = 6.3696e-03, PNorm = 184.2885, GNorm = 0.1424, lr_0 = 1.6100e-04
Loss = 2.5751e-03, PNorm = 184.2913, GNorm = 0.2708, lr_0 = 1.6089e-04
Loss = 4.5409e-03, PNorm = 184.2938, GNorm = 0.0639, lr_0 = 1.6078e-04
Loss = 5.9110e-03, PNorm = 184.2969, GNorm = 0.0658, lr_0 = 1.6067e-04
Loss = 4.1999e-03, PNorm = 184.2995, GNorm = 0.0687, lr_0 = 1.6056e-04
Loss = 3.0165e-03, PNorm = 184.3023, GNorm = 0.2499, lr_0 = 1.6045e-04
Loss = 2.8660e-03, PNorm = 184.3047, GNorm = 0.1627, lr_0 = 1.6034e-04
Loss = 5.8983e-03, PNorm = 184.3066, GNorm = 0.0653, lr_0 = 1.6023e-04
Loss = 3.5411e-03, PNorm = 184.3098, GNorm = 0.0885, lr_0 = 1.6012e-04
Loss = 2.4015e-03, PNorm = 184.3114, GNorm = 0.0900, lr_0 = 1.6001e-04
Loss = 2.3460e-03, PNorm = 184.3154, GNorm = 0.1006, lr_0 = 1.5990e-04
Loss = 3.4473e-03, PNorm = 184.3187, GNorm = 0.0692, lr_0 = 1.5979e-04
Loss = 1.8255e-03, PNorm = 184.3226, GNorm = 0.2411, lr_0 = 1.5968e-04
Loss = 2.2848e-03, PNorm = 184.3239, GNorm = 0.1056, lr_0 = 1.5957e-04
Loss = 3.3661e-03, PNorm = 184.3263, GNorm = 0.1029, lr_0 = 1.5946e-04
Loss = 2.2620e-03, PNorm = 184.3277, GNorm = 0.1146, lr_0 = 1.5935e-04
Loss = 5.0683e-03, PNorm = 184.3288, GNorm = 0.6172, lr_0 = 1.5924e-04
Loss = 7.4059e-03, PNorm = 184.3306, GNorm = 0.1235, lr_0 = 1.5913e-04
Loss = 2.7932e-03, PNorm = 184.3340, GNorm = 0.1769, lr_0 = 1.5902e-04
Loss = 2.4462e-03, PNorm = 184.3373, GNorm = 0.1472, lr_0 = 1.5891e-04
Loss = 4.9356e-03, PNorm = 184.3399, GNorm = 0.8707, lr_0 = 1.5880e-04
Loss = 3.2599e-03, PNorm = 184.3441, GNorm = 0.0671, lr_0 = 1.5870e-04
Loss = 2.0819e-03, PNorm = 184.3473, GNorm = 0.0757, lr_0 = 1.5859e-04
Loss = 2.5123e-03, PNorm = 184.3501, GNorm = 0.3325, lr_0 = 1.5848e-04
Loss = 5.9143e-03, PNorm = 184.3535, GNorm = 0.1231, lr_0 = 1.5837e-04
Loss = 4.7958e-03, PNorm = 184.3557, GNorm = 0.1209, lr_0 = 1.5826e-04
Loss = 2.0254e-03, PNorm = 184.3573, GNorm = 0.2078, lr_0 = 1.5815e-04
Loss = 3.6089e-03, PNorm = 184.3588, GNorm = 0.0948, lr_0 = 1.5804e-04
Loss = 8.2499e-03, PNorm = 184.3597, GNorm = 0.1208, lr_0 = 1.5794e-04
Loss = 4.0915e-03, PNorm = 184.3621, GNorm = 0.1374, lr_0 = 1.5783e-04
Loss = 2.7615e-03, PNorm = 184.3634, GNorm = 0.1355, lr_0 = 1.5772e-04
Loss = 4.8501e-03, PNorm = 184.3657, GNorm = 0.1163, lr_0 = 1.5761e-04
Loss = 7.7933e-03, PNorm = 184.3677, GNorm = 0.1042, lr_0 = 1.5750e-04
Loss = 2.8627e-03, PNorm = 184.3704, GNorm = 0.1488, lr_0 = 1.5740e-04
Loss = 2.1130e-03, PNorm = 184.3726, GNorm = 0.1874, lr_0 = 1.5729e-04
Loss = 4.9192e-03, PNorm = 184.3749, GNorm = 0.2649, lr_0 = 1.5718e-04
Loss = 2.3997e-03, PNorm = 184.3786, GNorm = 0.1733, lr_0 = 1.5707e-04
Loss = 5.2445e-03, PNorm = 184.3827, GNorm = 0.2792, lr_0 = 1.5697e-04
Loss = 1.9846e-03, PNorm = 184.3853, GNorm = 0.1107, lr_0 = 1.5686e-04
Loss = 5.2792e-03, PNorm = 184.3860, GNorm = 0.1098, lr_0 = 1.5675e-04
Loss = 3.7499e-03, PNorm = 184.3876, GNorm = 0.1647, lr_0 = 1.5664e-04
Loss = 6.0658e-03, PNorm = 184.3898, GNorm = 0.1129, lr_0 = 1.5654e-04
Loss = 6.1684e-03, PNorm = 184.3925, GNorm = 0.1335, lr_0 = 1.5643e-04
Loss = 3.7398e-03, PNorm = 184.3960, GNorm = 0.1654, lr_0 = 1.5632e-04
Loss = 3.4417e-03, PNorm = 184.3996, GNorm = 0.1240, lr_0 = 1.5621e-04
Loss = 9.8123e-03, PNorm = 184.4030, GNorm = 0.2792, lr_0 = 1.5611e-04
Loss = 3.5752e-03, PNorm = 184.4069, GNorm = 0.2005, lr_0 = 1.5600e-04
Loss = 2.4477e-03, PNorm = 184.4106, GNorm = 0.1616, lr_0 = 1.5589e-04
Loss = 3.0606e-03, PNorm = 184.4140, GNorm = 0.1252, lr_0 = 1.5579e-04
Loss = 7.7913e-03, PNorm = 184.4171, GNorm = 0.1275, lr_0 = 1.5568e-04
Loss = 4.1643e-03, PNorm = 184.4184, GNorm = 0.0925, lr_0 = 1.5557e-04
Loss = 2.7513e-03, PNorm = 184.4216, GNorm = 0.2059, lr_0 = 1.5547e-04
Loss = 2.3562e-03, PNorm = 184.4260, GNorm = 0.0925, lr_0 = 1.5536e-04
Loss = 2.7861e-03, PNorm = 184.4293, GNorm = 0.1467, lr_0 = 1.5525e-04
Loss = 2.2087e-03, PNorm = 184.4333, GNorm = 0.0886, lr_0 = 1.5515e-04
Loss = 5.0472e-03, PNorm = 184.4362, GNorm = 0.1469, lr_0 = 1.5504e-04
Loss = 2.4066e-03, PNorm = 184.4394, GNorm = 0.0691, lr_0 = 1.5493e-04
Loss = 7.0006e-03, PNorm = 184.4432, GNorm = 0.1771, lr_0 = 1.5483e-04
Loss = 3.0844e-03, PNorm = 184.4438, GNorm = 0.1099, lr_0 = 1.5472e-04
Loss = 3.9162e-03, PNorm = 184.4461, GNorm = 0.1582, lr_0 = 1.5462e-04
Loss = 2.4178e-03, PNorm = 184.4495, GNorm = 0.0666, lr_0 = 1.5451e-04
Loss = 7.0784e-03, PNorm = 184.4521, GNorm = 0.2237, lr_0 = 1.5440e-04
Loss = 3.2564e-03, PNorm = 184.4552, GNorm = 0.0829, lr_0 = 1.5430e-04
Loss = 2.1982e-03, PNorm = 184.4589, GNorm = 0.1150, lr_0 = 1.5419e-04
Loss = 1.9292e-03, PNorm = 184.4616, GNorm = 0.0704, lr_0 = 1.5409e-04
Loss = 7.0519e-03, PNorm = 184.4636, GNorm = 0.1043, lr_0 = 1.5398e-04
Loss = 2.7582e-03, PNorm = 184.4653, GNorm = 0.2020, lr_0 = 1.5388e-04
Loss = 1.7176e-03, PNorm = 184.4675, GNorm = 0.0564, lr_0 = 1.5377e-04
Loss = 2.2039e-03, PNorm = 184.4692, GNorm = 0.1441, lr_0 = 1.5367e-04
Loss = 2.5208e-03, PNorm = 184.4719, GNorm = 0.1769, lr_0 = 1.5356e-04
Loss = 4.1450e-03, PNorm = 184.4752, GNorm = 0.1171, lr_0 = 1.5346e-04
Loss = 1.7606e-03, PNorm = 184.4782, GNorm = 0.0806, lr_0 = 1.5335e-04
Loss = 5.6312e-03, PNorm = 184.4814, GNorm = 0.2467, lr_0 = 1.5325e-04
Loss = 3.7005e-03, PNorm = 184.4838, GNorm = 0.1982, lr_0 = 1.5314e-04
Loss = 3.6719e-03, PNorm = 184.4843, GNorm = 0.4076, lr_0 = 1.5304e-04
Loss = 3.1667e-03, PNorm = 184.4874, GNorm = 0.0982, lr_0 = 1.5293e-04
Loss = 2.9356e-03, PNorm = 184.4902, GNorm = 0.1721, lr_0 = 1.5283e-04
Loss = 3.4722e-03, PNorm = 184.4934, GNorm = 0.0935, lr_0 = 1.5272e-04
Loss = 3.5317e-03, PNorm = 184.4971, GNorm = 0.0610, lr_0 = 1.5262e-04
Loss = 2.9607e-03, PNorm = 184.5004, GNorm = 0.0751, lr_0 = 1.5251e-04
Loss = 1.1368e-02, PNorm = 184.5014, GNorm = 0.2241, lr_0 = 1.5241e-04
Loss = 5.2618e-03, PNorm = 184.5030, GNorm = 0.1972, lr_0 = 1.5230e-04
Loss = 4.2899e-03, PNorm = 184.5075, GNorm = 0.1572, lr_0 = 1.5220e-04
Loss = 6.3626e-03, PNorm = 184.5103, GNorm = 0.1090, lr_0 = 1.5209e-04
Loss = 6.6011e-03, PNorm = 184.5150, GNorm = 0.1126, lr_0 = 1.5199e-04
Loss = 7.5123e-03, PNorm = 184.5184, GNorm = 0.1256, lr_0 = 1.5189e-04
Loss = 3.0936e-03, PNorm = 184.5207, GNorm = 0.1842, lr_0 = 1.5178e-04
Loss = 5.0712e-03, PNorm = 184.5246, GNorm = 0.2582, lr_0 = 1.5168e-04
Loss = 5.5925e-03, PNorm = 184.5285, GNorm = 0.0520, lr_0 = 1.5157e-04
Loss = 1.8325e-02, PNorm = 184.5327, GNorm = 0.3484, lr_0 = 1.5147e-04
Loss = 5.1577e-03, PNorm = 184.5342, GNorm = 0.4129, lr_0 = 1.5137e-04
Loss = 5.8669e-03, PNorm = 184.5380, GNorm = 0.1557, lr_0 = 1.5126e-04
Loss = 4.5459e-03, PNorm = 184.5416, GNorm = 0.0798, lr_0 = 1.5116e-04
Loss = 1.9539e-03, PNorm = 184.5441, GNorm = 0.1087, lr_0 = 1.5106e-04
Loss = 3.4944e-03, PNorm = 184.5461, GNorm = 0.0706, lr_0 = 1.5095e-04
Loss = 5.5017e-03, PNorm = 184.5480, GNorm = 0.0959, lr_0 = 1.5085e-04
Validation mae = 0.121163
Epoch 25
Loss = 3.5622e-03, PNorm = 184.5494, GNorm = 0.1294, lr_0 = 1.5075e-04
Loss = 2.7460e-03, PNorm = 184.5517, GNorm = 0.0734, lr_0 = 1.5064e-04
Loss = 2.1159e-03, PNorm = 184.5541, GNorm = 0.0810, lr_0 = 1.5054e-04
Loss = 2.8702e-03, PNorm = 184.5569, GNorm = 0.0829, lr_0 = 1.5044e-04
Loss = 5.2109e-03, PNorm = 184.5605, GNorm = 0.1818, lr_0 = 1.5033e-04
Loss = 1.6011e-03, PNorm = 184.5633, GNorm = 0.0883, lr_0 = 1.5023e-04
Loss = 5.2608e-03, PNorm = 184.5656, GNorm = 0.2877, lr_0 = 1.5013e-04
Loss = 2.2341e-03, PNorm = 184.5677, GNorm = 0.0964, lr_0 = 1.5002e-04
Loss = 1.6695e-03, PNorm = 184.5713, GNorm = 0.0534, lr_0 = 1.4992e-04
Loss = 4.1154e-03, PNorm = 184.5730, GNorm = 0.0945, lr_0 = 1.4982e-04
Loss = 3.2868e-03, PNorm = 184.5735, GNorm = 0.1523, lr_0 = 1.4972e-04
Loss = 2.3838e-03, PNorm = 184.5751, GNorm = 0.1430, lr_0 = 1.4961e-04
Loss = 4.4603e-03, PNorm = 184.5775, GNorm = 0.1301, lr_0 = 1.4951e-04
Loss = 2.5713e-03, PNorm = 184.5795, GNorm = 0.1614, lr_0 = 1.4941e-04
Loss = 4.6677e-03, PNorm = 184.5812, GNorm = 0.1073, lr_0 = 1.4931e-04
Loss = 2.4853e-03, PNorm = 184.5840, GNorm = 0.0850, lr_0 = 1.4920e-04
Loss = 4.3031e-03, PNorm = 184.5862, GNorm = 0.1650, lr_0 = 1.4910e-04
Loss = 3.3834e-03, PNorm = 184.5882, GNorm = 0.0779, lr_0 = 1.4900e-04
Loss = 7.3344e-03, PNorm = 184.5891, GNorm = 0.1504, lr_0 = 1.4890e-04
Loss = 1.6936e-03, PNorm = 184.5928, GNorm = 0.1941, lr_0 = 1.4880e-04
Loss = 2.0318e-03, PNorm = 184.5945, GNorm = 0.1229, lr_0 = 1.4869e-04
Loss = 3.8846e-03, PNorm = 184.5973, GNorm = 0.1512, lr_0 = 1.4859e-04
Loss = 4.0225e-03, PNorm = 184.5998, GNorm = 0.3050, lr_0 = 1.4849e-04
Loss = 2.7585e-03, PNorm = 184.6026, GNorm = 0.1002, lr_0 = 1.4839e-04
Loss = 2.9835e-03, PNorm = 184.6039, GNorm = 0.1279, lr_0 = 1.4829e-04
Loss = 2.1697e-03, PNorm = 184.6055, GNorm = 0.1055, lr_0 = 1.4818e-04
Loss = 2.5457e-03, PNorm = 184.6071, GNorm = 0.1003, lr_0 = 1.4808e-04
Loss = 2.7675e-03, PNorm = 184.6087, GNorm = 0.5473, lr_0 = 1.4798e-04
Loss = 3.3171e-03, PNorm = 184.6107, GNorm = 0.1172, lr_0 = 1.4788e-04
Loss = 2.8841e-03, PNorm = 184.6130, GNorm = 0.2175, lr_0 = 1.4778e-04
Loss = 3.9896e-03, PNorm = 184.6156, GNorm = 0.2059, lr_0 = 1.4768e-04
Loss = 2.3741e-03, PNorm = 184.6174, GNorm = 0.2870, lr_0 = 1.4758e-04
Loss = 3.4562e-03, PNorm = 184.6196, GNorm = 0.0873, lr_0 = 1.4748e-04
Loss = 3.1878e-03, PNorm = 184.6216, GNorm = 0.1021, lr_0 = 1.4737e-04
Loss = 8.8270e-03, PNorm = 184.6249, GNorm = 0.2299, lr_0 = 1.4727e-04
Loss = 1.7181e-03, PNorm = 184.6282, GNorm = 0.0608, lr_0 = 1.4717e-04
Loss = 2.8341e-03, PNorm = 184.6310, GNorm = 0.1197, lr_0 = 1.4707e-04
Loss = 3.4598e-03, PNorm = 184.6335, GNorm = 0.1222, lr_0 = 1.4697e-04
Loss = 4.1202e-03, PNorm = 184.6350, GNorm = 0.0662, lr_0 = 1.4687e-04
Loss = 3.4258e-03, PNorm = 184.6377, GNorm = 0.1122, lr_0 = 1.4677e-04
Loss = 4.5128e-03, PNorm = 184.6394, GNorm = 0.1267, lr_0 = 1.4667e-04
Loss = 5.3400e-03, PNorm = 184.6413, GNorm = 0.2304, lr_0 = 1.4657e-04
Loss = 5.6443e-03, PNorm = 184.6454, GNorm = 0.0494, lr_0 = 1.4647e-04
Loss = 1.5200e-02, PNorm = 184.6475, GNorm = 2.7490, lr_0 = 1.4637e-04
Loss = 2.8919e-03, PNorm = 184.6486, GNorm = 0.2319, lr_0 = 1.4627e-04
Loss = 3.8935e-03, PNorm = 184.6485, GNorm = 0.0684, lr_0 = 1.4617e-04
Loss = 3.4075e-03, PNorm = 184.6517, GNorm = 0.1545, lr_0 = 1.4607e-04
Loss = 4.1353e-03, PNorm = 184.6543, GNorm = 0.0618, lr_0 = 1.4597e-04
Loss = 3.4493e-03, PNorm = 184.6562, GNorm = 0.2249, lr_0 = 1.4587e-04
Loss = 3.8903e-03, PNorm = 184.6572, GNorm = 0.1515, lr_0 = 1.4577e-04
Loss = 5.2600e-03, PNorm = 184.6580, GNorm = 0.2502, lr_0 = 1.4567e-04
Loss = 5.7068e-03, PNorm = 184.6586, GNorm = 0.3225, lr_0 = 1.4557e-04
Loss = 1.9960e-03, PNorm = 184.6602, GNorm = 0.2073, lr_0 = 1.4547e-04
Loss = 3.7820e-03, PNorm = 184.6625, GNorm = 0.1249, lr_0 = 1.4537e-04
Loss = 2.6376e-03, PNorm = 184.6656, GNorm = 0.0969, lr_0 = 1.4527e-04
Loss = 3.0543e-03, PNorm = 184.6692, GNorm = 0.0541, lr_0 = 1.4517e-04
Loss = 2.2001e-03, PNorm = 184.6714, GNorm = 0.1914, lr_0 = 1.4507e-04
Loss = 1.6715e-03, PNorm = 184.6729, GNorm = 0.1763, lr_0 = 1.4497e-04
Loss = 4.4701e-03, PNorm = 184.6763, GNorm = 0.0452, lr_0 = 1.4487e-04
Loss = 4.0031e-03, PNorm = 184.6775, GNorm = 0.0489, lr_0 = 1.4477e-04
Loss = 2.3566e-03, PNorm = 184.6803, GNorm = 0.1831, lr_0 = 1.4467e-04
Loss = 2.0760e-03, PNorm = 184.6833, GNorm = 0.0836, lr_0 = 1.4457e-04
Loss = 2.0627e-03, PNorm = 184.6854, GNorm = 0.1255, lr_0 = 1.4447e-04
Loss = 1.5618e-03, PNorm = 184.6869, GNorm = 0.1808, lr_0 = 1.4438e-04
Loss = 3.0361e-03, PNorm = 184.6903, GNorm = 0.2481, lr_0 = 1.4428e-04
Loss = 6.1606e-03, PNorm = 184.6919, GNorm = 0.1771, lr_0 = 1.4418e-04
Loss = 3.1261e-03, PNorm = 184.6941, GNorm = 0.1183, lr_0 = 1.4408e-04
Loss = 6.1241e-03, PNorm = 184.6963, GNorm = 0.2117, lr_0 = 1.4398e-04
Loss = 2.3981e-03, PNorm = 184.6990, GNorm = 0.0740, lr_0 = 1.4388e-04
Loss = 5.2574e-03, PNorm = 184.7023, GNorm = 0.3621, lr_0 = 1.4378e-04
Loss = 3.8640e-03, PNorm = 184.7038, GNorm = 0.1221, lr_0 = 1.4368e-04
Loss = 2.9124e-03, PNorm = 184.7053, GNorm = 0.0766, lr_0 = 1.4359e-04
Loss = 3.5220e-03, PNorm = 184.7062, GNorm = 0.8067, lr_0 = 1.4349e-04
Loss = 2.7805e-03, PNorm = 184.7071, GNorm = 0.0842, lr_0 = 1.4339e-04
Loss = 6.5422e-03, PNorm = 184.7107, GNorm = 0.1846, lr_0 = 1.4329e-04
Loss = 2.4307e-03, PNorm = 184.7148, GNorm = 0.0580, lr_0 = 1.4319e-04
Loss = 3.6204e-03, PNorm = 184.7183, GNorm = 0.1308, lr_0 = 1.4310e-04
Loss = 7.9621e-03, PNorm = 184.7190, GNorm = 0.1649, lr_0 = 1.4300e-04
Loss = 6.3739e-03, PNorm = 184.7212, GNorm = 0.3006, lr_0 = 1.4290e-04
Loss = 2.8877e-03, PNorm = 184.7261, GNorm = 0.1204, lr_0 = 1.4280e-04
Loss = 5.6916e-03, PNorm = 184.7272, GNorm = 0.0591, lr_0 = 1.4270e-04
Loss = 5.6074e-03, PNorm = 184.7295, GNorm = 0.0878, lr_0 = 1.4261e-04
Loss = 7.2220e-03, PNorm = 184.7330, GNorm = 0.1369, lr_0 = 1.4251e-04
Loss = 7.1121e-03, PNorm = 184.7344, GNorm = 0.0731, lr_0 = 1.4241e-04
Loss = 2.9186e-03, PNorm = 184.7374, GNorm = 0.1535, lr_0 = 1.4231e-04
Loss = 3.2905e-03, PNorm = 184.7397, GNorm = 0.5639, lr_0 = 1.4222e-04
Loss = 3.4031e-03, PNorm = 184.7416, GNorm = 0.1481, lr_0 = 1.4212e-04
Loss = 6.9136e-03, PNorm = 184.7440, GNorm = 0.1453, lr_0 = 1.4202e-04
Loss = 8.6268e-03, PNorm = 184.7477, GNorm = 0.1712, lr_0 = 1.4192e-04
Loss = 3.9306e-03, PNorm = 184.7502, GNorm = 0.1041, lr_0 = 1.4183e-04
Loss = 4.4513e-03, PNorm = 184.7535, GNorm = 0.1606, lr_0 = 1.4173e-04
Loss = 3.8773e-03, PNorm = 184.7552, GNorm = 0.1405, lr_0 = 1.4163e-04
Loss = 2.7005e-03, PNorm = 184.7584, GNorm = 0.0746, lr_0 = 1.4153e-04
Loss = 6.8984e-03, PNorm = 184.7634, GNorm = 0.1366, lr_0 = 1.4144e-04
Loss = 3.1644e-03, PNorm = 184.7659, GNorm = 0.0541, lr_0 = 1.4134e-04
Loss = 5.9190e-03, PNorm = 184.7683, GNorm = 0.7879, lr_0 = 1.4124e-04
Loss = 3.4524e-03, PNorm = 184.7709, GNorm = 0.0983, lr_0 = 1.4115e-04
Loss = 2.1677e-03, PNorm = 184.7740, GNorm = 0.0903, lr_0 = 1.4105e-04
Loss = 1.6036e-03, PNorm = 184.7751, GNorm = 0.1630, lr_0 = 1.4095e-04
Loss = 2.4741e-03, PNorm = 184.7765, GNorm = 0.0744, lr_0 = 1.4086e-04
Loss = 5.1584e-03, PNorm = 184.7766, GNorm = 0.6089, lr_0 = 1.4076e-04
Loss = 3.9384e-03, PNorm = 184.7780, GNorm = 0.1616, lr_0 = 1.4066e-04
Loss = 1.5687e-03, PNorm = 184.7800, GNorm = 0.1179, lr_0 = 1.4057e-04
Loss = 4.9196e-03, PNorm = 184.7818, GNorm = 0.6334, lr_0 = 1.4047e-04
Loss = 3.7743e-03, PNorm = 184.7818, GNorm = 0.1153, lr_0 = 1.4038e-04
Loss = 3.3057e-03, PNorm = 184.7829, GNorm = 0.0524, lr_0 = 1.4028e-04
Loss = 3.3162e-03, PNorm = 184.7848, GNorm = 0.1467, lr_0 = 1.4018e-04
Loss = 1.6132e-03, PNorm = 184.7877, GNorm = 0.1017, lr_0 = 1.4009e-04
Loss = 2.4698e-03, PNorm = 184.7911, GNorm = 0.2558, lr_0 = 1.3999e-04
Loss = 1.6869e-03, PNorm = 184.7944, GNorm = 0.1361, lr_0 = 1.3990e-04
Loss = 4.5020e-03, PNorm = 184.7961, GNorm = 0.1732, lr_0 = 1.3980e-04
Loss = 2.0566e-03, PNorm = 184.7991, GNorm = 0.1396, lr_0 = 1.3970e-04
Loss = 5.6293e-03, PNorm = 184.8019, GNorm = 0.3124, lr_0 = 1.3961e-04
Loss = 4.2580e-03, PNorm = 184.8045, GNorm = 0.4804, lr_0 = 1.3951e-04
Loss = 3.1541e-03, PNorm = 184.8054, GNorm = 0.2118, lr_0 = 1.3942e-04
Loss = 6.3397e-03, PNorm = 184.8077, GNorm = 0.0874, lr_0 = 1.3932e-04
Loss = 1.6432e-03, PNorm = 184.8097, GNorm = 0.1021, lr_0 = 1.3923e-04
Loss = 2.0689e-03, PNorm = 184.8123, GNorm = 0.0846, lr_0 = 1.3913e-04
Loss = 7.3977e-03, PNorm = 184.8158, GNorm = 0.1167, lr_0 = 1.3904e-04
Loss = 2.8610e-03, PNorm = 184.8189, GNorm = 0.1485, lr_0 = 1.3894e-04
Validation mae = 0.121166
Epoch 26
Loss = 3.6814e-03, PNorm = 184.8200, GNorm = 0.0864, lr_0 = 1.3884e-04
Loss = 1.9903e-03, PNorm = 184.8220, GNorm = 0.0861, lr_0 = 1.3875e-04
Loss = 2.9990e-03, PNorm = 184.8233, GNorm = 0.0710, lr_0 = 1.3865e-04
Loss = 3.0915e-03, PNorm = 184.8250, GNorm = 0.2042, lr_0 = 1.3856e-04
Loss = 1.7465e-03, PNorm = 184.8255, GNorm = 0.0931, lr_0 = 1.3846e-04
Loss = 6.2323e-03, PNorm = 184.8270, GNorm = 0.4408, lr_0 = 1.3837e-04
Loss = 1.4840e-03, PNorm = 184.8288, GNorm = 0.0970, lr_0 = 1.3828e-04
Loss = 8.4199e-03, PNorm = 184.8283, GNorm = 0.1818, lr_0 = 1.3818e-04
Loss = 4.1278e-03, PNorm = 184.8310, GNorm = 0.4047, lr_0 = 1.3809e-04
Loss = 1.6680e-03, PNorm = 184.8348, GNorm = 0.1087, lr_0 = 1.3799e-04
Loss = 1.6772e-03, PNorm = 184.8363, GNorm = 0.1152, lr_0 = 1.3790e-04
Loss = 2.7435e-03, PNorm = 184.8400, GNorm = 0.0649, lr_0 = 1.3780e-04
Loss = 1.7853e-03, PNorm = 184.8411, GNorm = 0.1954, lr_0 = 1.3771e-04
Loss = 2.0645e-03, PNorm = 184.8417, GNorm = 0.1175, lr_0 = 1.3761e-04
Loss = 3.4167e-03, PNorm = 184.8438, GNorm = 0.1310, lr_0 = 1.3752e-04
Loss = 6.7920e-03, PNorm = 184.8464, GNorm = 0.0887, lr_0 = 1.3742e-04
Loss = 1.9167e-03, PNorm = 184.8468, GNorm = 0.0547, lr_0 = 1.3733e-04
Loss = 1.9983e-03, PNorm = 184.8481, GNorm = 0.1280, lr_0 = 1.3724e-04
Loss = 2.6304e-03, PNorm = 184.8499, GNorm = 0.1228, lr_0 = 1.3714e-04
Loss = 1.5010e-03, PNorm = 184.8513, GNorm = 0.0840, lr_0 = 1.3705e-04
Loss = 2.5929e-03, PNorm = 184.8538, GNorm = 0.2027, lr_0 = 1.3695e-04
Loss = 2.6421e-03, PNorm = 184.8564, GNorm = 0.0948, lr_0 = 1.3686e-04
Loss = 3.4550e-03, PNorm = 184.8586, GNorm = 0.1026, lr_0 = 1.3677e-04
Loss = 5.2508e-03, PNorm = 184.8596, GNorm = 0.1531, lr_0 = 1.3667e-04
Loss = 3.0454e-03, PNorm = 184.8618, GNorm = 0.1665, lr_0 = 1.3658e-04
Loss = 5.5233e-03, PNorm = 184.8639, GNorm = 0.0752, lr_0 = 1.3649e-04
Loss = 5.0758e-03, PNorm = 184.8667, GNorm = 0.0726, lr_0 = 1.3639e-04
Loss = 2.0196e-03, PNorm = 184.8690, GNorm = 0.0640, lr_0 = 1.3630e-04
Loss = 1.8727e-03, PNorm = 184.8706, GNorm = 0.1025, lr_0 = 1.3621e-04
Loss = 3.8865e-03, PNorm = 184.8721, GNorm = 0.0588, lr_0 = 1.3611e-04
Loss = 2.4439e-03, PNorm = 184.8741, GNorm = 0.1398, lr_0 = 1.3602e-04
Loss = 2.5522e-03, PNorm = 184.8762, GNorm = 0.0688, lr_0 = 1.3593e-04
Loss = 1.9442e-03, PNorm = 184.8778, GNorm = 0.0619, lr_0 = 1.3583e-04
Loss = 2.4997e-03, PNorm = 184.8807, GNorm = 0.1689, lr_0 = 1.3574e-04
Loss = 8.3753e-03, PNorm = 184.8834, GNorm = 0.1457, lr_0 = 1.3565e-04
Loss = 2.3131e-03, PNorm = 184.8843, GNorm = 0.1211, lr_0 = 1.3555e-04
Loss = 1.8557e-03, PNorm = 184.8854, GNorm = 0.0857, lr_0 = 1.3546e-04
Loss = 4.3357e-03, PNorm = 184.8876, GNorm = 0.0791, lr_0 = 1.3537e-04
Loss = 1.8913e-03, PNorm = 184.8898, GNorm = 0.0914, lr_0 = 1.3528e-04
Loss = 2.3726e-03, PNorm = 184.8913, GNorm = 0.1433, lr_0 = 1.3518e-04
Loss = 1.6223e-03, PNorm = 184.8928, GNorm = 0.0892, lr_0 = 1.3509e-04
Loss = 5.9419e-03, PNorm = 184.8948, GNorm = 0.3137, lr_0 = 1.3500e-04
Loss = 5.5401e-03, PNorm = 184.8960, GNorm = 0.0513, lr_0 = 1.3491e-04
Loss = 5.6358e-03, PNorm = 184.8974, GNorm = 0.0703, lr_0 = 1.3481e-04
Loss = 1.3822e-03, PNorm = 184.8985, GNorm = 0.0550, lr_0 = 1.3472e-04
Loss = 8.4021e-03, PNorm = 184.8999, GNorm = 0.2784, lr_0 = 1.3463e-04
Loss = 2.4483e-03, PNorm = 184.9026, GNorm = 0.1818, lr_0 = 1.3454e-04
Loss = 3.0387e-03, PNorm = 184.9048, GNorm = 0.2518, lr_0 = 1.3444e-04
Loss = 4.4307e-03, PNorm = 184.9071, GNorm = 0.0811, lr_0 = 1.3435e-04
Loss = 3.2044e-03, PNorm = 184.9107, GNorm = 0.1186, lr_0 = 1.3426e-04
Loss = 3.6729e-03, PNorm = 184.9136, GNorm = 0.0681, lr_0 = 1.3417e-04
Loss = 3.0024e-03, PNorm = 184.9152, GNorm = 0.1209, lr_0 = 1.3408e-04
Loss = 3.8235e-03, PNorm = 184.9165, GNorm = 0.0640, lr_0 = 1.3398e-04
Loss = 2.7406e-03, PNorm = 184.9178, GNorm = 0.1949, lr_0 = 1.3389e-04
Loss = 1.6829e-03, PNorm = 184.9189, GNorm = 0.1088, lr_0 = 1.3380e-04
Loss = 2.3785e-03, PNorm = 184.9214, GNorm = 0.1445, lr_0 = 1.3371e-04
Loss = 2.9798e-03, PNorm = 184.9244, GNorm = 0.2034, lr_0 = 1.3362e-04
Loss = 3.9939e-03, PNorm = 184.9262, GNorm = 0.1385, lr_0 = 1.3353e-04
Loss = 4.5242e-03, PNorm = 184.9272, GNorm = 0.0921, lr_0 = 1.3343e-04
Loss = 1.6721e-03, PNorm = 184.9291, GNorm = 0.1062, lr_0 = 1.3334e-04
Loss = 5.2564e-03, PNorm = 184.9315, GNorm = 0.0641, lr_0 = 1.3325e-04
Loss = 1.9449e-03, PNorm = 184.9339, GNorm = 0.0906, lr_0 = 1.3316e-04
Loss = 2.3983e-03, PNorm = 184.9359, GNorm = 0.0947, lr_0 = 1.3307e-04
Loss = 6.3657e-03, PNorm = 184.9376, GNorm = 0.2020, lr_0 = 1.3298e-04
Loss = 4.0388e-03, PNorm = 184.9401, GNorm = 0.0845, lr_0 = 1.3289e-04
Loss = 2.2026e-03, PNorm = 184.9440, GNorm = 0.2007, lr_0 = 1.3280e-04
Loss = 5.4290e-03, PNorm = 184.9461, GNorm = 1.2385, lr_0 = 1.3270e-04
Loss = 5.4621e-03, PNorm = 184.9490, GNorm = 0.0531, lr_0 = 1.3261e-04
Loss = 3.3923e-03, PNorm = 184.9503, GNorm = 0.0628, lr_0 = 1.3252e-04
Loss = 1.9609e-03, PNorm = 184.9523, GNorm = 0.0673, lr_0 = 1.3243e-04
Loss = 3.6563e-03, PNorm = 184.9529, GNorm = 0.0449, lr_0 = 1.3234e-04
Loss = 2.3239e-03, PNorm = 184.9544, GNorm = 0.0970, lr_0 = 1.3225e-04
Loss = 3.9399e-03, PNorm = 184.9556, GNorm = 0.2176, lr_0 = 1.3216e-04
Loss = 6.2734e-03, PNorm = 184.9579, GNorm = 0.0937, lr_0 = 1.3207e-04
Loss = 3.4950e-03, PNorm = 184.9600, GNorm = 0.0582, lr_0 = 1.3198e-04
Loss = 5.4813e-03, PNorm = 184.9626, GNorm = 0.1805, lr_0 = 1.3189e-04
Loss = 3.0014e-03, PNorm = 184.9653, GNorm = 0.1910, lr_0 = 1.3180e-04
Loss = 1.3268e-02, PNorm = 184.9676, GNorm = 0.0808, lr_0 = 1.3171e-04
Loss = 2.4923e-03, PNorm = 184.9689, GNorm = 0.1487, lr_0 = 1.3162e-04
Loss = 6.6881e-03, PNorm = 184.9697, GNorm = 0.1797, lr_0 = 1.3153e-04
Loss = 6.0826e-03, PNorm = 184.9708, GNorm = 0.0983, lr_0 = 1.3144e-04
Loss = 1.6531e-03, PNorm = 184.9714, GNorm = 0.0979, lr_0 = 1.3135e-04
Loss = 2.7600e-03, PNorm = 184.9723, GNorm = 0.0995, lr_0 = 1.3126e-04
Loss = 4.0543e-03, PNorm = 184.9738, GNorm = 0.0958, lr_0 = 1.3117e-04
Loss = 3.5387e-03, PNorm = 184.9749, GNorm = 0.1377, lr_0 = 1.3108e-04
Loss = 2.2049e-03, PNorm = 184.9760, GNorm = 0.0991, lr_0 = 1.3099e-04
Loss = 1.7891e-03, PNorm = 184.9781, GNorm = 0.0559, lr_0 = 1.3090e-04
Loss = 3.3623e-03, PNorm = 184.9802, GNorm = 0.1324, lr_0 = 1.3081e-04
Loss = 6.9077e-03, PNorm = 184.9823, GNorm = 0.1307, lr_0 = 1.3072e-04
Loss = 6.7759e-03, PNorm = 184.9845, GNorm = 0.0509, lr_0 = 1.3063e-04
Loss = 2.1773e-03, PNorm = 184.9870, GNorm = 0.1542, lr_0 = 1.3054e-04
Loss = 3.8701e-03, PNorm = 184.9890, GNorm = 0.1100, lr_0 = 1.3045e-04
Loss = 2.1222e-03, PNorm = 184.9909, GNorm = 0.0842, lr_0 = 1.3036e-04
Loss = 2.9958e-03, PNorm = 184.9927, GNorm = 0.1526, lr_0 = 1.3027e-04
Loss = 2.7412e-03, PNorm = 184.9949, GNorm = 0.1262, lr_0 = 1.3018e-04
Loss = 3.0667e-03, PNorm = 184.9977, GNorm = 0.1423, lr_0 = 1.3009e-04
Loss = 3.2365e-03, PNorm = 185.0011, GNorm = 0.0992, lr_0 = 1.3000e-04
Loss = 4.6499e-03, PNorm = 185.0027, GNorm = 0.1020, lr_0 = 1.2992e-04
Loss = 1.8254e-03, PNorm = 185.0049, GNorm = 0.0989, lr_0 = 1.2983e-04
Loss = 3.5489e-03, PNorm = 185.0053, GNorm = 0.1831, lr_0 = 1.2974e-04
Loss = 2.9617e-03, PNorm = 185.0054, GNorm = 0.1603, lr_0 = 1.2965e-04
Loss = 2.6461e-03, PNorm = 185.0071, GNorm = 0.0390, lr_0 = 1.2956e-04
Loss = 3.9505e-03, PNorm = 185.0082, GNorm = 0.2077, lr_0 = 1.2947e-04
Loss = 7.2018e-03, PNorm = 185.0093, GNorm = 0.1089, lr_0 = 1.2938e-04
Loss = 4.7502e-03, PNorm = 185.0119, GNorm = 0.1318, lr_0 = 1.2929e-04
Loss = 2.3201e-03, PNorm = 185.0150, GNorm = 0.2175, lr_0 = 1.2921e-04
Loss = 4.5620e-03, PNorm = 185.0181, GNorm = 0.0733, lr_0 = 1.2912e-04
Loss = 2.2309e-03, PNorm = 185.0211, GNorm = 0.1526, lr_0 = 1.2903e-04
Loss = 3.0493e-03, PNorm = 185.0228, GNorm = 0.1467, lr_0 = 1.2894e-04
Loss = 2.4717e-03, PNorm = 185.0239, GNorm = 0.0979, lr_0 = 1.2885e-04
Loss = 1.6048e-03, PNorm = 185.0256, GNorm = 0.0657, lr_0 = 1.2876e-04
Loss = 5.7079e-03, PNorm = 185.0274, GNorm = 0.1403, lr_0 = 1.2867e-04
Loss = 1.4796e-03, PNorm = 185.0297, GNorm = 0.1254, lr_0 = 1.2859e-04
Loss = 3.9031e-03, PNorm = 185.0313, GNorm = 0.1037, lr_0 = 1.2850e-04
Loss = 1.6555e-03, PNorm = 185.0335, GNorm = 0.1306, lr_0 = 1.2841e-04
Loss = 2.9291e-03, PNorm = 185.0356, GNorm = 0.1213, lr_0 = 1.2832e-04
Loss = 6.9822e-03, PNorm = 185.0376, GNorm = 0.1086, lr_0 = 1.2823e-04
Loss = 1.4732e-03, PNorm = 185.0391, GNorm = 0.1173, lr_0 = 1.2815e-04
Loss = 5.3818e-03, PNorm = 185.0402, GNorm = 0.5533, lr_0 = 1.2806e-04
Loss = 3.2475e-03, PNorm = 185.0420, GNorm = 0.0910, lr_0 = 1.2797e-04
Validation mae = 0.121082
Epoch 27
Loss = 1.2945e-03, PNorm = 185.0438, GNorm = 0.0693, lr_0 = 1.2788e-04
Loss = 1.8709e-03, PNorm = 185.0455, GNorm = 0.0802, lr_0 = 1.2780e-04
Loss = 2.5536e-03, PNorm = 185.0471, GNorm = 0.0525, lr_0 = 1.2771e-04
Loss = 3.2771e-03, PNorm = 185.0470, GNorm = 0.0583, lr_0 = 1.2762e-04
Loss = 2.8077e-03, PNorm = 185.0479, GNorm = 0.0676, lr_0 = 1.2753e-04
Loss = 3.0108e-03, PNorm = 185.0496, GNorm = 0.4317, lr_0 = 1.2745e-04
Loss = 2.7919e-03, PNorm = 185.0511, GNorm = 0.0460, lr_0 = 1.2736e-04
Loss = 1.2805e-03, PNorm = 185.0519, GNorm = 0.1173, lr_0 = 1.2727e-04
Loss = 4.4706e-03, PNorm = 185.0529, GNorm = 0.0761, lr_0 = 1.2718e-04
Loss = 6.4342e-03, PNorm = 185.0558, GNorm = 0.1785, lr_0 = 1.2710e-04
Loss = 4.3769e-03, PNorm = 185.0577, GNorm = 0.1151, lr_0 = 1.2701e-04
Loss = 1.2617e-03, PNorm = 185.0597, GNorm = 0.0959, lr_0 = 1.2692e-04
Loss = 5.2438e-03, PNorm = 185.0611, GNorm = 0.7063, lr_0 = 1.2684e-04
Loss = 1.7658e-03, PNorm = 185.0632, GNorm = 0.0990, lr_0 = 1.2675e-04
Loss = 6.3548e-03, PNorm = 185.0642, GNorm = 0.4371, lr_0 = 1.2666e-04
Loss = 6.5033e-03, PNorm = 185.0643, GNorm = 0.0898, lr_0 = 1.2658e-04
Loss = 1.7137e-03, PNorm = 185.0652, GNorm = 0.0651, lr_0 = 1.2649e-04
Loss = 2.6718e-03, PNorm = 185.0673, GNorm = 0.2977, lr_0 = 1.2640e-04
Loss = 4.6757e-03, PNorm = 185.0688, GNorm = 0.3331, lr_0 = 1.2632e-04
Loss = 4.2619e-03, PNorm = 185.0701, GNorm = 0.1404, lr_0 = 1.2623e-04
Loss = 4.3826e-03, PNorm = 185.0723, GNorm = 0.1047, lr_0 = 1.2614e-04
Loss = 3.6342e-03, PNorm = 185.0745, GNorm = 0.0747, lr_0 = 1.2606e-04
Loss = 4.5840e-03, PNorm = 185.0761, GNorm = 0.1877, lr_0 = 1.2597e-04
Loss = 4.4478e-03, PNorm = 185.0790, GNorm = 0.1792, lr_0 = 1.2588e-04
Loss = 4.0256e-03, PNorm = 185.0798, GNorm = 0.0399, lr_0 = 1.2580e-04
Loss = 6.4874e-03, PNorm = 185.0809, GNorm = 0.1275, lr_0 = 1.2571e-04
Loss = 7.9034e-03, PNorm = 185.0837, GNorm = 0.2683, lr_0 = 1.2563e-04
Loss = 1.9161e-03, PNorm = 185.0859, GNorm = 0.0771, lr_0 = 1.2554e-04
Loss = 1.8264e-03, PNorm = 185.0886, GNorm = 0.1657, lr_0 = 1.2545e-04
Loss = 2.9761e-03, PNorm = 185.0910, GNorm = 0.0792, lr_0 = 1.2537e-04
Loss = 3.1408e-03, PNorm = 185.0916, GNorm = 0.0874, lr_0 = 1.2528e-04
Loss = 2.5367e-03, PNorm = 185.0935, GNorm = 0.1404, lr_0 = 1.2520e-04
Loss = 3.0223e-03, PNorm = 185.0971, GNorm = 0.1499, lr_0 = 1.2511e-04
Loss = 1.0967e-03, PNorm = 185.0990, GNorm = 0.0869, lr_0 = 1.2502e-04
Loss = 5.5077e-03, PNorm = 185.1010, GNorm = 0.1319, lr_0 = 1.2494e-04
Loss = 5.8436e-03, PNorm = 185.1027, GNorm = 0.1862, lr_0 = 1.2485e-04
Loss = 2.9425e-03, PNorm = 185.1035, GNorm = 0.1449, lr_0 = 1.2477e-04
Loss = 6.7763e-03, PNorm = 185.1056, GNorm = 0.1322, lr_0 = 1.2468e-04
Loss = 1.1039e-03, PNorm = 185.1072, GNorm = 0.0801, lr_0 = 1.2460e-04
Loss = 5.1348e-03, PNorm = 185.1081, GNorm = 0.0409, lr_0 = 1.2451e-04
Loss = 1.1710e-03, PNorm = 185.1091, GNorm = 0.0397, lr_0 = 1.2443e-04
Loss = 1.1926e-03, PNorm = 185.1098, GNorm = 0.0794, lr_0 = 1.2434e-04
Loss = 2.8076e-03, PNorm = 185.1110, GNorm = 0.1850, lr_0 = 1.2426e-04
Loss = 1.3626e-03, PNorm = 185.1114, GNorm = 0.1320, lr_0 = 1.2417e-04
Loss = 3.6500e-03, PNorm = 185.1129, GNorm = 0.0952, lr_0 = 1.2409e-04
Loss = 3.1081e-03, PNorm = 185.1143, GNorm = 0.0888, lr_0 = 1.2400e-04
Loss = 2.6245e-03, PNorm = 185.1153, GNorm = 0.7368, lr_0 = 1.2392e-04
Loss = 1.7284e-03, PNorm = 185.1170, GNorm = 0.1454, lr_0 = 1.2383e-04
Loss = 1.5054e-03, PNorm = 185.1176, GNorm = 0.1111, lr_0 = 1.2375e-04
Loss = 3.9435e-03, PNorm = 185.1183, GNorm = 0.0432, lr_0 = 1.2366e-04
Loss = 1.3574e-03, PNorm = 185.1194, GNorm = 0.0815, lr_0 = 1.2358e-04
Loss = 2.5101e-03, PNorm = 185.1207, GNorm = 0.1944, lr_0 = 1.2349e-04
Loss = 1.4250e-03, PNorm = 185.1223, GNorm = 0.1029, lr_0 = 1.2341e-04
Loss = 1.1723e-03, PNorm = 185.1234, GNorm = 0.1377, lr_0 = 1.2332e-04
Loss = 4.5304e-03, PNorm = 185.1248, GNorm = 0.0510, lr_0 = 1.2324e-04
Loss = 1.6611e-03, PNorm = 185.1259, GNorm = 0.0762, lr_0 = 1.2315e-04
Loss = 1.2019e-03, PNorm = 185.1276, GNorm = 0.0780, lr_0 = 1.2307e-04
Loss = 1.5031e-03, PNorm = 185.1287, GNorm = 0.0854, lr_0 = 1.2298e-04
Loss = 2.5995e-03, PNorm = 185.1293, GNorm = 0.0545, lr_0 = 1.2290e-04
Loss = 1.7788e-03, PNorm = 185.1305, GNorm = 0.1320, lr_0 = 1.2282e-04
Loss = 2.2498e-03, PNorm = 185.1322, GNorm = 0.2193, lr_0 = 1.2273e-04
Loss = 5.0678e-03, PNorm = 185.1349, GNorm = 0.0690, lr_0 = 1.2265e-04
Loss = 2.2026e-03, PNorm = 185.1369, GNorm = 0.1209, lr_0 = 1.2256e-04
Loss = 2.8010e-03, PNorm = 185.1388, GNorm = 0.0962, lr_0 = 1.2248e-04
Loss = 1.4614e-03, PNorm = 185.1408, GNorm = 0.0795, lr_0 = 1.2240e-04
Loss = 1.1690e-03, PNorm = 185.1436, GNorm = 0.0462, lr_0 = 1.2231e-04
Loss = 3.1753e-03, PNorm = 185.1453, GNorm = 0.0407, lr_0 = 1.2223e-04
Loss = 1.8749e-03, PNorm = 185.1454, GNorm = 0.0554, lr_0 = 1.2214e-04
Loss = 4.1322e-03, PNorm = 185.1456, GNorm = 0.1196, lr_0 = 1.2206e-04
Loss = 1.3809e-03, PNorm = 185.1455, GNorm = 0.0755, lr_0 = 1.2198e-04
Loss = 3.6125e-03, PNorm = 185.1465, GNorm = 0.2102, lr_0 = 1.2189e-04
Loss = 2.6273e-03, PNorm = 185.1484, GNorm = 0.1703, lr_0 = 1.2181e-04
Loss = 2.2739e-03, PNorm = 185.1504, GNorm = 0.0478, lr_0 = 1.2173e-04
Loss = 2.3782e-03, PNorm = 185.1523, GNorm = 0.2588, lr_0 = 1.2164e-04
Loss = 3.9243e-03, PNorm = 185.1532, GNorm = 0.1818, lr_0 = 1.2156e-04
Loss = 6.0668e-03, PNorm = 185.1553, GNorm = 0.1533, lr_0 = 1.2148e-04
Loss = 1.6570e-03, PNorm = 185.1580, GNorm = 0.0630, lr_0 = 1.2139e-04
Loss = 2.4073e-03, PNorm = 185.1583, GNorm = 0.0819, lr_0 = 1.2131e-04
Loss = 3.4578e-03, PNorm = 185.1590, GNorm = 0.0517, lr_0 = 1.2123e-04
Loss = 1.3216e-03, PNorm = 185.1593, GNorm = 0.0496, lr_0 = 1.2114e-04
Loss = 2.3023e-03, PNorm = 185.1600, GNorm = 0.1407, lr_0 = 1.2106e-04
Loss = 1.3618e-03, PNorm = 185.1620, GNorm = 0.1882, lr_0 = 1.2098e-04
Loss = 1.2425e-03, PNorm = 185.1631, GNorm = 0.0896, lr_0 = 1.2090e-04
Loss = 1.5921e-03, PNorm = 185.1635, GNorm = 0.1091, lr_0 = 1.2081e-04
Loss = 4.0432e-03, PNorm = 185.1657, GNorm = 0.2512, lr_0 = 1.2073e-04
Loss = 1.1967e-02, PNorm = 185.1695, GNorm = 0.0970, lr_0 = 1.2065e-04
Loss = 2.3346e-03, PNorm = 185.1705, GNorm = 0.0932, lr_0 = 1.2056e-04
Loss = 5.8539e-03, PNorm = 185.1724, GNorm = 0.2203, lr_0 = 1.2048e-04
Loss = 5.8185e-03, PNorm = 185.1726, GNorm = 0.1692, lr_0 = 1.2040e-04
Loss = 2.2803e-03, PNorm = 185.1751, GNorm = 0.1239, lr_0 = 1.2032e-04
Loss = 6.6813e-03, PNorm = 185.1772, GNorm = 0.1375, lr_0 = 1.2023e-04
Loss = 1.5710e-03, PNorm = 185.1789, GNorm = 0.0571, lr_0 = 1.2015e-04
Loss = 1.4858e-03, PNorm = 185.1820, GNorm = 0.1029, lr_0 = 1.2007e-04
Loss = 2.3070e-03, PNorm = 185.1845, GNorm = 0.0542, lr_0 = 1.1999e-04
Loss = 1.6191e-03, PNorm = 185.1868, GNorm = 0.0534, lr_0 = 1.1991e-04
Loss = 1.2630e-03, PNorm = 185.1886, GNorm = 0.0463, lr_0 = 1.1982e-04
Loss = 1.5151e-03, PNorm = 185.1897, GNorm = 0.2517, lr_0 = 1.1974e-04
Loss = 6.8516e-03, PNorm = 185.1905, GNorm = 0.1077, lr_0 = 1.1966e-04
Loss = 5.2270e-03, PNorm = 185.1918, GNorm = 0.0839, lr_0 = 1.1958e-04
Loss = 2.9971e-03, PNorm = 185.1937, GNorm = 0.0476, lr_0 = 1.1950e-04
Loss = 5.1714e-03, PNorm = 185.1955, GNorm = 0.2046, lr_0 = 1.1941e-04
Loss = 7.2522e-03, PNorm = 185.1978, GNorm = 0.1803, lr_0 = 1.1933e-04
Loss = 4.9273e-03, PNorm = 185.1987, GNorm = 0.1116, lr_0 = 1.1925e-04
Loss = 1.5598e-03, PNorm = 185.2003, GNorm = 0.0453, lr_0 = 1.1917e-04
Loss = 2.8603e-03, PNorm = 185.2018, GNorm = 0.1193, lr_0 = 1.1909e-04
Loss = 5.9563e-03, PNorm = 185.2027, GNorm = 0.6915, lr_0 = 1.1901e-04
Loss = 1.2831e-03, PNorm = 185.2036, GNorm = 0.0991, lr_0 = 1.1892e-04
Loss = 3.3871e-03, PNorm = 185.2050, GNorm = 0.0681, lr_0 = 1.1884e-04
Loss = 5.4274e-03, PNorm = 185.2068, GNorm = 0.0896, lr_0 = 1.1876e-04
Loss = 2.1782e-03, PNorm = 185.2090, GNorm = 0.1216, lr_0 = 1.1868e-04
Loss = 2.9979e-03, PNorm = 185.2109, GNorm = 0.1062, lr_0 = 1.1860e-04
Loss = 3.3899e-03, PNorm = 185.2131, GNorm = 0.0645, lr_0 = 1.1852e-04
Loss = 7.5646e-03, PNorm = 185.2153, GNorm = 0.1805, lr_0 = 1.1844e-04
Loss = 6.8077e-03, PNorm = 185.2163, GNorm = 0.0982, lr_0 = 1.1835e-04
Loss = 4.0129e-03, PNorm = 185.2175, GNorm = 0.0447, lr_0 = 1.1827e-04
Loss = 2.4643e-03, PNorm = 185.2202, GNorm = 0.0477, lr_0 = 1.1819e-04
Loss = 2.6910e-03, PNorm = 185.2210, GNorm = 0.0521, lr_0 = 1.1811e-04
Loss = 2.0679e-03, PNorm = 185.2220, GNorm = 0.1111, lr_0 = 1.1803e-04
Loss = 4.9112e-03, PNorm = 185.2248, GNorm = 0.4486, lr_0 = 1.1795e-04
Loss = 1.9048e-03, PNorm = 185.2273, GNorm = 0.0729, lr_0 = 1.1787e-04
Validation mae = 0.121081
Epoch 28
Loss = 1.4159e-03, PNorm = 185.2292, GNorm = 0.2323, lr_0 = 1.1779e-04
Loss = 3.5534e-03, PNorm = 185.2311, GNorm = 0.0796, lr_0 = 1.1771e-04
Loss = 1.9814e-03, PNorm = 185.2322, GNorm = 0.0823, lr_0 = 1.1763e-04
Loss = 2.3618e-03, PNorm = 185.2331, GNorm = 0.1991, lr_0 = 1.1755e-04
Loss = 1.5843e-03, PNorm = 185.2336, GNorm = 0.0912, lr_0 = 1.1747e-04
Loss = 1.0827e-03, PNorm = 185.2350, GNorm = 0.0650, lr_0 = 1.1739e-04
Loss = 3.5865e-03, PNorm = 185.2370, GNorm = 0.0482, lr_0 = 1.1730e-04
Loss = 1.1908e-03, PNorm = 185.2385, GNorm = 0.1598, lr_0 = 1.1722e-04
Loss = 3.0524e-03, PNorm = 185.2395, GNorm = 0.0944, lr_0 = 1.1714e-04
Loss = 1.1658e-03, PNorm = 185.2408, GNorm = 0.1074, lr_0 = 1.1706e-04
Loss = 3.6766e-03, PNorm = 185.2420, GNorm = 0.0724, lr_0 = 1.1698e-04
Loss = 2.3290e-03, PNorm = 185.2434, GNorm = 0.0984, lr_0 = 1.1690e-04
Loss = 1.9320e-03, PNorm = 185.2447, GNorm = 0.1216, lr_0 = 1.1682e-04
Loss = 1.7782e-03, PNorm = 185.2459, GNorm = 0.1211, lr_0 = 1.1674e-04
Loss = 1.4239e-03, PNorm = 185.2463, GNorm = 0.1405, lr_0 = 1.1666e-04
Loss = 1.0231e-03, PNorm = 185.2469, GNorm = 0.1409, lr_0 = 1.1658e-04
Loss = 7.3967e-03, PNorm = 185.2471, GNorm = 0.0572, lr_0 = 1.1650e-04
Loss = 3.2432e-03, PNorm = 185.2489, GNorm = 0.0614, lr_0 = 1.1642e-04
Loss = 2.8323e-03, PNorm = 185.2510, GNorm = 0.0678, lr_0 = 1.1634e-04
Loss = 2.5550e-03, PNorm = 185.2518, GNorm = 0.0915, lr_0 = 1.1626e-04
Loss = 3.6282e-03, PNorm = 185.2516, GNorm = 0.0407, lr_0 = 1.1618e-04
Loss = 1.5442e-03, PNorm = 185.2524, GNorm = 0.0812, lr_0 = 1.1611e-04
Loss = 4.1470e-03, PNorm = 185.2541, GNorm = 0.1832, lr_0 = 1.1603e-04
Loss = 1.1876e-03, PNorm = 185.2557, GNorm = 0.0504, lr_0 = 1.1595e-04
Loss = 5.9575e-03, PNorm = 185.2569, GNorm = 0.1574, lr_0 = 1.1587e-04
Loss = 1.7556e-03, PNorm = 185.2575, GNorm = 0.0420, lr_0 = 1.1579e-04
Loss = 1.3448e-03, PNorm = 185.2588, GNorm = 0.0841, lr_0 = 1.1571e-04
Loss = 5.0437e-03, PNorm = 185.2595, GNorm = 0.1110, lr_0 = 1.1563e-04
Loss = 1.2930e-02, PNorm = 185.2618, GNorm = 0.1247, lr_0 = 1.1555e-04
Loss = 2.4467e-03, PNorm = 185.2634, GNorm = 0.0605, lr_0 = 1.1547e-04
Loss = 1.3152e-03, PNorm = 185.2628, GNorm = 0.0712, lr_0 = 1.1539e-04
Loss = 3.8962e-03, PNorm = 185.2643, GNorm = 0.1401, lr_0 = 1.1531e-04
Loss = 3.4228e-03, PNorm = 185.2650, GNorm = 0.1942, lr_0 = 1.1523e-04
Loss = 3.4130e-03, PNorm = 185.2642, GNorm = 0.0716, lr_0 = 1.1515e-04
Loss = 1.2498e-03, PNorm = 185.2645, GNorm = 0.0792, lr_0 = 1.1508e-04
Loss = 1.2695e-03, PNorm = 185.2664, GNorm = 0.0567, lr_0 = 1.1500e-04
Loss = 1.1599e-03, PNorm = 185.2687, GNorm = 0.0475, lr_0 = 1.1492e-04
Loss = 9.6290e-03, PNorm = 185.2699, GNorm = 0.3354, lr_0 = 1.1484e-04
Loss = 1.2839e-03, PNorm = 185.2731, GNorm = 0.0904, lr_0 = 1.1476e-04
Loss = 2.6764e-03, PNorm = 185.2749, GNorm = 0.0559, lr_0 = 1.1468e-04
Loss = 2.9534e-03, PNorm = 185.2767, GNorm = 0.0938, lr_0 = 1.1460e-04
Loss = 1.5165e-03, PNorm = 185.2786, GNorm = 0.0722, lr_0 = 1.1452e-04
Loss = 7.1459e-03, PNorm = 185.2800, GNorm = 0.0464, lr_0 = 1.1445e-04
Loss = 2.0011e-03, PNorm = 185.2818, GNorm = 0.0962, lr_0 = 1.1437e-04
Loss = 1.0343e-03, PNorm = 185.2840, GNorm = 0.0607, lr_0 = 1.1429e-04
Loss = 1.5634e-03, PNorm = 185.2851, GNorm = 0.0867, lr_0 = 1.1421e-04
Loss = 3.3044e-03, PNorm = 185.2856, GNorm = 0.0703, lr_0 = 1.1413e-04
Loss = 4.1412e-03, PNorm = 185.2859, GNorm = 0.1327, lr_0 = 1.1405e-04
Loss = 1.0859e-03, PNorm = 185.2859, GNorm = 0.0533, lr_0 = 1.1398e-04
Loss = 2.1897e-03, PNorm = 185.2864, GNorm = 0.0972, lr_0 = 1.1390e-04
Loss = 1.5352e-03, PNorm = 185.2869, GNorm = 0.0697, lr_0 = 1.1382e-04
Loss = 8.5839e-03, PNorm = 185.2879, GNorm = 0.0754, lr_0 = 1.1374e-04
Loss = 3.9394e-03, PNorm = 185.2887, GNorm = 0.1075, lr_0 = 1.1366e-04
Loss = 4.9252e-03, PNorm = 185.2899, GNorm = 0.1222, lr_0 = 1.1359e-04
Loss = 2.8033e-03, PNorm = 185.2924, GNorm = 0.0874, lr_0 = 1.1351e-04
Loss = 5.3990e-03, PNorm = 185.2941, GNorm = 0.0683, lr_0 = 1.1343e-04
Loss = 3.8684e-03, PNorm = 185.2953, GNorm = 0.1463, lr_0 = 1.1335e-04
Loss = 1.1687e-03, PNorm = 185.2967, GNorm = 0.0766, lr_0 = 1.1328e-04
Loss = 3.8206e-03, PNorm = 185.2984, GNorm = 0.1070, lr_0 = 1.1320e-04
Loss = 3.7407e-03, PNorm = 185.3004, GNorm = 0.0975, lr_0 = 1.1312e-04
Loss = 9.0938e-04, PNorm = 185.3031, GNorm = 0.0490, lr_0 = 1.1304e-04
Loss = 4.4248e-03, PNorm = 185.3044, GNorm = 0.2463, lr_0 = 1.1297e-04
Loss = 9.6760e-04, PNorm = 185.3052, GNorm = 0.1339, lr_0 = 1.1289e-04
Loss = 2.1756e-03, PNorm = 185.3065, GNorm = 0.2096, lr_0 = 1.1281e-04
Loss = 8.8747e-04, PNorm = 185.3078, GNorm = 0.0791, lr_0 = 1.1273e-04
Loss = 1.6213e-03, PNorm = 185.3083, GNorm = 0.0743, lr_0 = 1.1266e-04
Loss = 6.7826e-03, PNorm = 185.3093, GNorm = 0.0644, lr_0 = 1.1258e-04
Loss = 3.0152e-03, PNorm = 185.3110, GNorm = 0.1075, lr_0 = 1.1250e-04
Loss = 1.5278e-03, PNorm = 185.3135, GNorm = 0.0884, lr_0 = 1.1243e-04
Loss = 1.2104e-03, PNorm = 185.3164, GNorm = 0.0915, lr_0 = 1.1235e-04
Loss = 1.9725e-03, PNorm = 185.3182, GNorm = 0.0757, lr_0 = 1.1227e-04
Loss = 3.2403e-03, PNorm = 185.3188, GNorm = 0.1022, lr_0 = 1.1219e-04
Loss = 2.5078e-03, PNorm = 185.3202, GNorm = 0.3438, lr_0 = 1.1212e-04
Loss = 5.5155e-03, PNorm = 185.3209, GNorm = 0.0366, lr_0 = 1.1204e-04
Loss = 1.9819e-03, PNorm = 185.3225, GNorm = 0.3351, lr_0 = 1.1196e-04
Loss = 2.4126e-03, PNorm = 185.3236, GNorm = 0.0469, lr_0 = 1.1189e-04
Loss = 3.5372e-03, PNorm = 185.3249, GNorm = 0.0624, lr_0 = 1.1181e-04
Loss = 8.5114e-03, PNorm = 185.3269, GNorm = 0.1539, lr_0 = 1.1173e-04
Loss = 1.7059e-03, PNorm = 185.3292, GNorm = 0.1609, lr_0 = 1.1166e-04
Loss = 4.4899e-03, PNorm = 185.3294, GNorm = 0.0476, lr_0 = 1.1158e-04
Loss = 4.5709e-03, PNorm = 185.3289, GNorm = 0.5383, lr_0 = 1.1150e-04
Loss = 3.6040e-03, PNorm = 185.3298, GNorm = 0.2019, lr_0 = 1.1143e-04
Loss = 2.8765e-03, PNorm = 185.3302, GNorm = 0.0630, lr_0 = 1.1135e-04
Loss = 4.9318e-03, PNorm = 185.3321, GNorm = 0.1111, lr_0 = 1.1128e-04
Loss = 3.4832e-03, PNorm = 185.3349, GNorm = 0.1149, lr_0 = 1.1120e-04
Loss = 2.7811e-03, PNorm = 185.3358, GNorm = 0.1254, lr_0 = 1.1112e-04
Loss = 2.0921e-03, PNorm = 185.3369, GNorm = 0.0503, lr_0 = 1.1105e-04
Loss = 5.9066e-03, PNorm = 185.3385, GNorm = 0.1097, lr_0 = 1.1097e-04
Loss = 2.5682e-03, PNorm = 185.3402, GNorm = 0.0912, lr_0 = 1.1089e-04
Loss = 1.3819e-03, PNorm = 185.3419, GNorm = 0.0876, lr_0 = 1.1082e-04
Loss = 3.9073e-03, PNorm = 185.3428, GNorm = 0.0528, lr_0 = 1.1074e-04
Loss = 1.1129e-03, PNorm = 185.3443, GNorm = 0.0738, lr_0 = 1.1067e-04
Loss = 1.4430e-03, PNorm = 185.3456, GNorm = 0.1260, lr_0 = 1.1059e-04
Loss = 1.7895e-03, PNorm = 185.3456, GNorm = 0.1243, lr_0 = 1.1052e-04
Loss = 2.5417e-03, PNorm = 185.3467, GNorm = 0.0900, lr_0 = 1.1044e-04
Loss = 5.4109e-03, PNorm = 185.3491, GNorm = 0.0985, lr_0 = 1.1036e-04
Loss = 1.8968e-03, PNorm = 185.3508, GNorm = 0.0559, lr_0 = 1.1029e-04
Loss = 2.1723e-03, PNorm = 185.3530, GNorm = 0.0918, lr_0 = 1.1021e-04
Loss = 2.3588e-03, PNorm = 185.3547, GNorm = 0.0656, lr_0 = 1.1014e-04
Loss = 3.5550e-03, PNorm = 185.3548, GNorm = 0.0921, lr_0 = 1.1006e-04
Loss = 1.1642e-03, PNorm = 185.3561, GNorm = 0.1285, lr_0 = 1.0999e-04
Loss = 2.9797e-03, PNorm = 185.3584, GNorm = 0.0596, lr_0 = 1.0991e-04
Loss = 1.8563e-03, PNorm = 185.3592, GNorm = 0.0715, lr_0 = 1.0984e-04
Loss = 1.5549e-03, PNorm = 185.3600, GNorm = 0.1030, lr_0 = 1.0976e-04
Loss = 3.8699e-03, PNorm = 185.3600, GNorm = 0.0795, lr_0 = 1.0969e-04
Loss = 3.7394e-03, PNorm = 185.3607, GNorm = 0.1066, lr_0 = 1.0961e-04
Loss = 3.3365e-03, PNorm = 185.3622, GNorm = 0.0794, lr_0 = 1.0954e-04
Loss = 2.4171e-03, PNorm = 185.3633, GNorm = 0.0657, lr_0 = 1.0946e-04
Loss = 3.7989e-03, PNorm = 185.3635, GNorm = 0.0712, lr_0 = 1.0939e-04
Loss = 1.0561e-03, PNorm = 185.3656, GNorm = 0.0431, lr_0 = 1.0931e-04
Loss = 5.2810e-03, PNorm = 185.3668, GNorm = 0.1165, lr_0 = 1.0924e-04
Loss = 3.5645e-03, PNorm = 185.3687, GNorm = 0.3735, lr_0 = 1.0916e-04
Loss = 1.0093e-03, PNorm = 185.3700, GNorm = 0.0740, lr_0 = 1.0909e-04
Loss = 8.1348e-03, PNorm = 185.3710, GNorm = 0.3434, lr_0 = 1.0901e-04
Loss = 3.6698e-03, PNorm = 185.3719, GNorm = 0.2579, lr_0 = 1.0894e-04
Loss = 2.7848e-03, PNorm = 185.3727, GNorm = 0.1278, lr_0 = 1.0886e-04
Loss = 8.7798e-04, PNorm = 185.3742, GNorm = 0.0525, lr_0 = 1.0879e-04
Loss = 2.6655e-03, PNorm = 185.3767, GNorm = 0.1397, lr_0 = 1.0871e-04
Loss = 5.5688e-03, PNorm = 185.3784, GNorm = 0.0777, lr_0 = 1.0864e-04
Loss = 2.0035e-03, PNorm = 185.3791, GNorm = 0.2112, lr_0 = 1.0856e-04
Validation mae = 0.121135
Epoch 29
Loss = 1.7667e-03, PNorm = 185.3800, GNorm = 0.0691, lr_0 = 1.0849e-04
Loss = 2.9272e-03, PNorm = 185.3801, GNorm = 0.0491, lr_0 = 1.0841e-04
Loss = 3.8713e-03, PNorm = 185.3822, GNorm = 0.1338, lr_0 = 1.0834e-04
Loss = 2.3174e-03, PNorm = 185.3836, GNorm = 0.0696, lr_0 = 1.0827e-04
Loss = 4.2948e-03, PNorm = 185.3853, GNorm = 0.0597, lr_0 = 1.0819e-04
Loss = 5.1484e-03, PNorm = 185.3869, GNorm = 0.1982, lr_0 = 1.0812e-04
Loss = 1.6181e-03, PNorm = 185.3887, GNorm = 0.0867, lr_0 = 1.0804e-04
Loss = 1.1658e-03, PNorm = 185.3905, GNorm = 0.0417, lr_0 = 1.0797e-04
Loss = 5.1521e-03, PNorm = 185.3910, GNorm = 0.0858, lr_0 = 1.0790e-04
Loss = 1.6888e-03, PNorm = 185.3924, GNorm = 0.1263, lr_0 = 1.0782e-04
Loss = 1.3310e-03, PNorm = 185.3946, GNorm = 0.0754, lr_0 = 1.0775e-04
Loss = 2.2689e-03, PNorm = 185.3964, GNorm = 0.0717, lr_0 = 1.0767e-04
Loss = 1.2127e-03, PNorm = 185.3968, GNorm = 0.0597, lr_0 = 1.0760e-04
Loss = 2.3367e-03, PNorm = 185.3973, GNorm = 0.1796, lr_0 = 1.0753e-04
Loss = 2.1634e-03, PNorm = 185.3986, GNorm = 0.0730, lr_0 = 1.0745e-04
Loss = 8.6815e-04, PNorm = 185.4003, GNorm = 0.0368, lr_0 = 1.0738e-04
Loss = 1.5469e-03, PNorm = 185.4015, GNorm = 0.1448, lr_0 = 1.0731e-04
Loss = 8.9911e-04, PNorm = 185.4026, GNorm = 0.0631, lr_0 = 1.0723e-04
Loss = 2.5935e-03, PNorm = 185.4037, GNorm = 0.1457, lr_0 = 1.0716e-04
Loss = 9.8846e-04, PNorm = 185.4050, GNorm = 0.0660, lr_0 = 1.0709e-04
Loss = 5.0410e-03, PNorm = 185.4065, GNorm = 0.0529, lr_0 = 1.0701e-04
Loss = 2.0432e-03, PNorm = 185.4079, GNorm = 0.1467, lr_0 = 1.0694e-04
Loss = 1.2873e-03, PNorm = 185.4085, GNorm = 0.0677, lr_0 = 1.0687e-04
Loss = 3.4504e-03, PNorm = 185.4092, GNorm = 0.1081, lr_0 = 1.0679e-04
Loss = 1.8802e-03, PNorm = 185.4103, GNorm = 0.0968, lr_0 = 1.0672e-04
Loss = 1.0245e-03, PNorm = 185.4118, GNorm = 0.0991, lr_0 = 1.0665e-04
Loss = 1.0199e-03, PNorm = 185.4128, GNorm = 0.0585, lr_0 = 1.0657e-04
Loss = 2.2387e-03, PNorm = 185.4135, GNorm = 0.2090, lr_0 = 1.0650e-04
Loss = 1.5711e-03, PNorm = 185.4137, GNorm = 0.0384, lr_0 = 1.0643e-04
Loss = 1.4498e-02, PNorm = 185.4155, GNorm = 0.0598, lr_0 = 1.0635e-04
Loss = 1.6859e-03, PNorm = 185.4157, GNorm = 0.1473, lr_0 = 1.0628e-04
Loss = 8.8257e-04, PNorm = 185.4157, GNorm = 0.0622, lr_0 = 1.0621e-04
Loss = 1.5310e-03, PNorm = 185.4166, GNorm = 0.0566, lr_0 = 1.0614e-04
Loss = 9.6217e-04, PNorm = 185.4178, GNorm = 0.0998, lr_0 = 1.0606e-04
Loss = 1.5469e-03, PNorm = 185.4192, GNorm = 0.1225, lr_0 = 1.0599e-04
Loss = 1.6740e-03, PNorm = 185.4204, GNorm = 0.0453, lr_0 = 1.0592e-04
Loss = 2.3251e-03, PNorm = 185.4215, GNorm = 0.0488, lr_0 = 1.0585e-04
Loss = 4.2083e-03, PNorm = 185.4228, GNorm = 0.0727, lr_0 = 1.0577e-04
Loss = 3.2792e-03, PNorm = 185.4244, GNorm = 0.1233, lr_0 = 1.0570e-04
Loss = 3.7967e-03, PNorm = 185.4259, GNorm = 0.0920, lr_0 = 1.0563e-04
Loss = 1.3392e-03, PNorm = 185.4274, GNorm = 0.1074, lr_0 = 1.0556e-04
Loss = 1.2844e-03, PNorm = 185.4285, GNorm = 0.0885, lr_0 = 1.0548e-04
Loss = 1.2580e-03, PNorm = 185.4295, GNorm = 0.1394, lr_0 = 1.0541e-04
Loss = 2.8040e-03, PNorm = 185.4299, GNorm = 0.0808, lr_0 = 1.0534e-04
Loss = 4.3189e-03, PNorm = 185.4296, GNorm = 0.0560, lr_0 = 1.0527e-04
Loss = 3.4304e-03, PNorm = 185.4309, GNorm = 0.0729, lr_0 = 1.0519e-04
Loss = 3.9283e-03, PNorm = 185.4324, GNorm = 0.0869, lr_0 = 1.0512e-04
Loss = 2.3243e-03, PNorm = 185.4336, GNorm = 0.0623, lr_0 = 1.0505e-04
Loss = 1.1724e-03, PNorm = 185.4349, GNorm = 0.1406, lr_0 = 1.0498e-04
Loss = 4.5369e-03, PNorm = 185.4363, GNorm = 0.6991, lr_0 = 1.0491e-04
Loss = 3.0518e-03, PNorm = 185.4388, GNorm = 0.1155, lr_0 = 1.0483e-04
Loss = 1.4952e-03, PNorm = 185.4399, GNorm = 0.1407, lr_0 = 1.0476e-04
Loss = 2.0010e-03, PNorm = 185.4412, GNorm = 0.0352, lr_0 = 1.0469e-04
Loss = 1.1114e-03, PNorm = 185.4427, GNorm = 0.0621, lr_0 = 1.0462e-04
Loss = 4.5730e-03, PNorm = 185.4446, GNorm = 0.0872, lr_0 = 1.0455e-04
Loss = 2.1138e-03, PNorm = 185.4470, GNorm = 0.1278, lr_0 = 1.0448e-04
Loss = 3.8086e-03, PNorm = 185.4475, GNorm = 0.2191, lr_0 = 1.0440e-04
Loss = 2.5216e-03, PNorm = 185.4479, GNorm = 0.0744, lr_0 = 1.0433e-04
Loss = 2.0673e-03, PNorm = 185.4488, GNorm = 0.1581, lr_0 = 1.0426e-04
Loss = 1.7164e-03, PNorm = 185.4497, GNorm = 0.0571, lr_0 = 1.0419e-04
Loss = 1.6812e-03, PNorm = 185.4504, GNorm = 0.1116, lr_0 = 1.0412e-04
Loss = 6.2275e-03, PNorm = 185.4527, GNorm = 0.1279, lr_0 = 1.0405e-04
Loss = 2.1283e-03, PNorm = 185.4549, GNorm = 0.0333, lr_0 = 1.0398e-04
Loss = 4.3051e-03, PNorm = 185.4551, GNorm = 0.0700, lr_0 = 1.0391e-04
Loss = 4.7154e-03, PNorm = 185.4566, GNorm = 0.0600, lr_0 = 1.0383e-04
Loss = 1.7197e-03, PNorm = 185.4574, GNorm = 0.0869, lr_0 = 1.0376e-04
Loss = 8.0338e-04, PNorm = 185.4584, GNorm = 0.0653, lr_0 = 1.0369e-04
Loss = 1.7440e-03, PNorm = 185.4599, GNorm = 0.0480, lr_0 = 1.0362e-04
Loss = 1.2172e-03, PNorm = 185.4614, GNorm = 0.0573, lr_0 = 1.0355e-04
Loss = 8.0360e-03, PNorm = 185.4633, GNorm = 0.3827, lr_0 = 1.0348e-04
Loss = 3.2880e-03, PNorm = 185.4641, GNorm = 0.2859, lr_0 = 1.0341e-04
Loss = 8.6641e-04, PNorm = 185.4670, GNorm = 0.0946, lr_0 = 1.0334e-04
Loss = 5.1252e-03, PNorm = 185.4686, GNorm = 0.3827, lr_0 = 1.0327e-04
Loss = 1.3509e-03, PNorm = 185.4702, GNorm = 0.0822, lr_0 = 1.0320e-04
Loss = 7.6620e-03, PNorm = 185.4710, GNorm = 0.1598, lr_0 = 1.0312e-04
Loss = 5.1648e-03, PNorm = 185.4711, GNorm = 0.0479, lr_0 = 1.0305e-04
Loss = 2.4309e-03, PNorm = 185.4710, GNorm = 0.0378, lr_0 = 1.0298e-04
Loss = 3.0317e-03, PNorm = 185.4719, GNorm = 0.1179, lr_0 = 1.0291e-04
Loss = 1.6588e-03, PNorm = 185.4728, GNorm = 0.1846, lr_0 = 1.0284e-04
Loss = 2.5279e-03, PNorm = 185.4729, GNorm = 0.1064, lr_0 = 1.0277e-04
Loss = 4.7617e-03, PNorm = 185.4725, GNorm = 0.1768, lr_0 = 1.0270e-04
Loss = 1.9480e-03, PNorm = 185.4741, GNorm = 0.1206, lr_0 = 1.0263e-04
Loss = 8.1002e-04, PNorm = 185.4760, GNorm = 0.1115, lr_0 = 1.0256e-04
Loss = 1.9135e-03, PNorm = 185.4782, GNorm = 0.0812, lr_0 = 1.0249e-04
Loss = 1.6535e-03, PNorm = 185.4807, GNorm = 0.2796, lr_0 = 1.0242e-04
Loss = 5.8775e-03, PNorm = 185.4818, GNorm = 0.1217, lr_0 = 1.0235e-04
Loss = 6.3781e-03, PNorm = 185.4824, GNorm = 0.4904, lr_0 = 1.0228e-04
Loss = 1.4472e-03, PNorm = 185.4842, GNorm = 0.0772, lr_0 = 1.0221e-04
Loss = 2.7314e-03, PNorm = 185.4858, GNorm = 0.0428, lr_0 = 1.0214e-04
Loss = 1.1610e-03, PNorm = 185.4876, GNorm = 0.1018, lr_0 = 1.0207e-04
Loss = 2.5694e-03, PNorm = 185.4889, GNorm = 0.0521, lr_0 = 1.0200e-04
Loss = 1.9711e-03, PNorm = 185.4899, GNorm = 0.0590, lr_0 = 1.0193e-04
Loss = 3.2977e-03, PNorm = 185.4907, GNorm = 0.1911, lr_0 = 1.0186e-04
Loss = 2.5357e-03, PNorm = 185.4913, GNorm = 0.1491, lr_0 = 1.0179e-04
Loss = 1.6486e-03, PNorm = 185.4923, GNorm = 0.0617, lr_0 = 1.0172e-04
Loss = 1.1221e-03, PNorm = 185.4940, GNorm = 0.0945, lr_0 = 1.0165e-04
Loss = 4.4407e-03, PNorm = 185.4949, GNorm = 0.1271, lr_0 = 1.0158e-04
Loss = 1.6690e-03, PNorm = 185.4963, GNorm = 0.1482, lr_0 = 1.0151e-04
Loss = 1.1689e-03, PNorm = 185.4977, GNorm = 0.0608, lr_0 = 1.0144e-04
Loss = 6.3241e-03, PNorm = 185.5004, GNorm = 0.0636, lr_0 = 1.0137e-04
Loss = 9.1235e-04, PNorm = 185.5018, GNorm = 0.0587, lr_0 = 1.0130e-04
Loss = 4.0976e-03, PNorm = 185.5021, GNorm = 0.1505, lr_0 = 1.0123e-04
Loss = 3.3486e-03, PNorm = 185.5032, GNorm = 0.2307, lr_0 = 1.0116e-04
Loss = 1.9755e-03, PNorm = 185.5041, GNorm = 0.0506, lr_0 = 1.0110e-04
Loss = 4.2231e-03, PNorm = 185.5057, GNorm = 0.1881, lr_0 = 1.0103e-04
Loss = 1.6471e-03, PNorm = 185.5076, GNorm = 0.1017, lr_0 = 1.0096e-04
Loss = 2.5642e-03, PNorm = 185.5083, GNorm = 0.0776, lr_0 = 1.0089e-04
Loss = 2.9348e-03, PNorm = 185.5094, GNorm = 0.1015, lr_0 = 1.0082e-04
Loss = 1.3186e-03, PNorm = 185.5118, GNorm = 0.1948, lr_0 = 1.0075e-04
Loss = 1.4612e-03, PNorm = 185.5135, GNorm = 0.0720, lr_0 = 1.0068e-04
Loss = 3.2267e-03, PNorm = 185.5137, GNorm = 0.0260, lr_0 = 1.0061e-04
Loss = 2.7198e-03, PNorm = 185.5147, GNorm = 0.0688, lr_0 = 1.0054e-04
Loss = 2.6372e-03, PNorm = 185.5156, GNorm = 0.0701, lr_0 = 1.0047e-04
Loss = 4.0195e-03, PNorm = 185.5159, GNorm = 0.2175, lr_0 = 1.0041e-04
Loss = 3.6243e-03, PNorm = 185.5165, GNorm = 0.1383, lr_0 = 1.0034e-04
Loss = 9.0472e-03, PNorm = 185.5191, GNorm = 0.0792, lr_0 = 1.0027e-04
Loss = 1.2461e-03, PNorm = 185.5203, GNorm = 0.0529, lr_0 = 1.0020e-04
Loss = 5.2796e-03, PNorm = 185.5210, GNorm = 0.0725, lr_0 = 1.0013e-04
Loss = 3.7531e-03, PNorm = 185.5209, GNorm = 0.0506, lr_0 = 1.0006e-04
Loss = 7.1540e-03, PNorm = 185.5196, GNorm = 0.1426, lr_0 = 1.0000e-04
Validation mae = 0.120839
Model 0 best validation mae = 0.120839 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119489
Ensemble test mae = 0.119489
Fold 1
Splitting data with seed 1
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.2696e-01, PNorm = 64.6279, GNorm = 2.8003, lr_0 = 1.0413e-04
Loss = 8.6792e-01, PNorm = 64.6387, GNorm = 1.7285, lr_0 = 1.0788e-04
Loss = 8.2645e-01, PNorm = 64.6500, GNorm = 2.8979, lr_0 = 1.1163e-04
Loss = 7.8010e-01, PNorm = 64.6602, GNorm = 2.6945, lr_0 = 1.1537e-04
Loss = 6.7065e-01, PNorm = 64.6696, GNorm = 2.5692, lr_0 = 1.1913e-04
Loss = 7.6983e-01, PNorm = 64.6788, GNorm = 1.8443, lr_0 = 1.2287e-04
Loss = 6.9733e-01, PNorm = 64.6890, GNorm = 1.8540, lr_0 = 1.2663e-04
Loss = 6.3066e-01, PNorm = 64.6987, GNorm = 2.3353, lr_0 = 1.3038e-04
Loss = 6.6170e-01, PNorm = 64.7093, GNorm = 2.8196, lr_0 = 1.3413e-04
Loss = 7.1242e-01, PNorm = 64.7187, GNorm = 2.9384, lr_0 = 1.3788e-04
Loss = 7.3155e-01, PNorm = 64.7286, GNorm = 4.0018, lr_0 = 1.4163e-04
Loss = 5.9635e-01, PNorm = 64.7404, GNorm = 3.0872, lr_0 = 1.4537e-04
Loss = 6.1511e-01, PNorm = 64.7534, GNorm = 2.4961, lr_0 = 1.4913e-04
Loss = 7.5783e-01, PNorm = 64.7638, GNorm = 2.8692, lr_0 = 1.5288e-04
Loss = 6.1280e-01, PNorm = 64.7743, GNorm = 2.3215, lr_0 = 1.5662e-04
Loss = 5.9470e-01, PNorm = 64.7863, GNorm = 2.5574, lr_0 = 1.6038e-04
Loss = 5.9396e-01, PNorm = 64.7984, GNorm = 3.1568, lr_0 = 1.6412e-04
Loss = 5.5018e-01, PNorm = 64.8087, GNorm = 1.9800, lr_0 = 1.6788e-04
Loss = 6.2870e-01, PNorm = 64.8179, GNorm = 2.2889, lr_0 = 1.7163e-04
Loss = 6.7689e-01, PNorm = 64.8315, GNorm = 2.3756, lr_0 = 1.7538e-04
Loss = 6.1637e-01, PNorm = 64.8453, GNorm = 2.1995, lr_0 = 1.7913e-04
Loss = 5.8881e-01, PNorm = 64.8608, GNorm = 2.2492, lr_0 = 1.8288e-04
Loss = 5.6562e-01, PNorm = 64.8748, GNorm = 3.6170, lr_0 = 1.8662e-04
Loss = 6.9804e-01, PNorm = 64.8892, GNorm = 3.0840, lr_0 = 1.9038e-04
Loss = 6.2712e-01, PNorm = 64.9057, GNorm = 3.3945, lr_0 = 1.9413e-04
Loss = 5.6721e-01, PNorm = 64.9227, GNorm = 1.5544, lr_0 = 1.9788e-04
Loss = 6.3098e-01, PNorm = 64.9369, GNorm = 2.2231, lr_0 = 2.0163e-04
Loss = 5.6354e-01, PNorm = 64.9513, GNorm = 2.0866, lr_0 = 2.0537e-04
Loss = 6.0237e-01, PNorm = 64.9690, GNorm = 1.9774, lr_0 = 2.0913e-04
Loss = 5.8992e-01, PNorm = 64.9855, GNorm = 2.1892, lr_0 = 2.1288e-04
Loss = 4.6163e-01, PNorm = 65.0015, GNorm = 2.3592, lr_0 = 2.1663e-04
Loss = 4.8177e-01, PNorm = 65.0150, GNorm = 1.4618, lr_0 = 2.2038e-04
Loss = 5.6624e-01, PNorm = 65.0302, GNorm = 1.5357, lr_0 = 2.2412e-04
Loss = 5.8002e-01, PNorm = 65.0508, GNorm = 2.5866, lr_0 = 2.2787e-04
Loss = 5.2401e-01, PNorm = 65.0706, GNorm = 2.0848, lr_0 = 2.3163e-04
Loss = 5.9079e-01, PNorm = 65.0871, GNorm = 2.3387, lr_0 = 2.3538e-04
Loss = 4.8739e-01, PNorm = 65.1067, GNorm = 1.6347, lr_0 = 2.3913e-04
Loss = 6.6853e-01, PNorm = 65.1268, GNorm = 2.2053, lr_0 = 2.4288e-04
Loss = 4.7971e-01, PNorm = 65.1507, GNorm = 1.8050, lr_0 = 2.4662e-04
Loss = 5.2668e-01, PNorm = 65.1713, GNorm = 2.9892, lr_0 = 2.5038e-04
Loss = 5.3305e-01, PNorm = 65.1915, GNorm = 2.0832, lr_0 = 2.5413e-04
Loss = 4.8759e-01, PNorm = 65.2140, GNorm = 1.3698, lr_0 = 2.5788e-04
Loss = 6.1980e-01, PNorm = 65.2346, GNorm = 1.5664, lr_0 = 2.6163e-04
Loss = 5.4730e-01, PNorm = 65.2557, GNorm = 1.5682, lr_0 = 2.6537e-04
Loss = 6.0383e-01, PNorm = 65.2793, GNorm = 1.6716, lr_0 = 2.6912e-04
Loss = 4.7077e-01, PNorm = 65.3026, GNorm = 1.5712, lr_0 = 2.7288e-04
Loss = 5.0664e-01, PNorm = 65.3273, GNorm = 1.7347, lr_0 = 2.7663e-04
Loss = 5.8273e-01, PNorm = 65.3519, GNorm = 1.8463, lr_0 = 2.8038e-04
Loss = 5.1932e-01, PNorm = 65.3787, GNorm = 1.9234, lr_0 = 2.8413e-04
Loss = 6.1811e-01, PNorm = 65.3994, GNorm = 1.5120, lr_0 = 2.8787e-04
Loss = 4.8082e-01, PNorm = 65.4274, GNorm = 1.2270, lr_0 = 2.9163e-04
Loss = 5.1842e-01, PNorm = 65.4524, GNorm = 1.4989, lr_0 = 2.9538e-04
Loss = 6.0563e-01, PNorm = 65.4762, GNorm = 1.9813, lr_0 = 2.9913e-04
Loss = 4.9652e-01, PNorm = 65.5079, GNorm = 2.3238, lr_0 = 3.0288e-04
Loss = 5.5347e-01, PNorm = 65.5344, GNorm = 1.7864, lr_0 = 3.0662e-04
Loss = 5.5878e-01, PNorm = 65.5645, GNorm = 1.9988, lr_0 = 3.1037e-04
Loss = 5.3474e-01, PNorm = 65.5934, GNorm = 1.7438, lr_0 = 3.1413e-04
Loss = 4.7869e-01, PNorm = 65.6193, GNorm = 1.5563, lr_0 = 3.1788e-04
Loss = 5.0344e-01, PNorm = 65.6473, GNorm = 1.3445, lr_0 = 3.2163e-04
Loss = 5.6655e-01, PNorm = 65.6758, GNorm = 1.6099, lr_0 = 3.2538e-04
Loss = 5.2222e-01, PNorm = 65.7056, GNorm = 1.3370, lr_0 = 3.2912e-04
Loss = 4.8676e-01, PNorm = 65.7386, GNorm = 1.4629, lr_0 = 3.3288e-04
Loss = 5.1743e-01, PNorm = 65.7705, GNorm = 1.4241, lr_0 = 3.3663e-04
Loss = 5.1637e-01, PNorm = 65.8005, GNorm = 1.4733, lr_0 = 3.4038e-04
Loss = 5.0136e-01, PNorm = 65.8342, GNorm = 1.2555, lr_0 = 3.4413e-04
Loss = 4.9737e-01, PNorm = 65.8692, GNorm = 1.6660, lr_0 = 3.4787e-04
Loss = 5.6406e-01, PNorm = 65.9045, GNorm = 1.5267, lr_0 = 3.5162e-04
Loss = 4.8305e-01, PNorm = 65.9397, GNorm = 1.4898, lr_0 = 3.5538e-04
Loss = 5.2254e-01, PNorm = 65.9788, GNorm = 1.1512, lr_0 = 3.5913e-04
Loss = 4.8204e-01, PNorm = 66.0162, GNorm = 1.2374, lr_0 = 3.6288e-04
Loss = 4.7838e-01, PNorm = 66.0533, GNorm = 2.1937, lr_0 = 3.6662e-04
Loss = 5.4944e-01, PNorm = 66.0917, GNorm = 1.3871, lr_0 = 3.7037e-04
Loss = 4.8149e-01, PNorm = 66.1319, GNorm = 1.1120, lr_0 = 3.7413e-04
Loss = 5.3695e-01, PNorm = 66.1715, GNorm = 1.4430, lr_0 = 3.7788e-04
Loss = 5.5645e-01, PNorm = 66.2102, GNorm = 1.3077, lr_0 = 3.8163e-04
Loss = 5.3687e-01, PNorm = 66.2520, GNorm = 2.0917, lr_0 = 3.8537e-04
Loss = 5.2000e-01, PNorm = 66.2959, GNorm = 2.3647, lr_0 = 3.8912e-04
Loss = 5.7938e-01, PNorm = 66.3340, GNorm = 1.4173, lr_0 = 3.9287e-04
Loss = 5.1695e-01, PNorm = 66.3827, GNorm = 1.6662, lr_0 = 3.9663e-04
Loss = 5.4827e-01, PNorm = 66.4263, GNorm = 1.2787, lr_0 = 4.0038e-04
Loss = 5.1396e-01, PNorm = 66.4680, GNorm = 1.3371, lr_0 = 4.0413e-04
Loss = 4.8665e-01, PNorm = 66.5096, GNorm = 1.1651, lr_0 = 4.0787e-04
Loss = 5.3897e-01, PNorm = 66.5571, GNorm = 1.3820, lr_0 = 4.1162e-04
Loss = 4.4990e-01, PNorm = 66.5926, GNorm = 1.6825, lr_0 = 4.1537e-04
Loss = 5.7165e-01, PNorm = 66.6348, GNorm = 2.0895, lr_0 = 4.1913e-04
Loss = 5.3102e-01, PNorm = 66.6836, GNorm = 1.7798, lr_0 = 4.2288e-04
Loss = 4.8846e-01, PNorm = 66.7284, GNorm = 1.3665, lr_0 = 4.2662e-04
Loss = 4.7018e-01, PNorm = 66.7834, GNorm = 1.6345, lr_0 = 4.3037e-04
Loss = 5.6315e-01, PNorm = 66.8268, GNorm = 1.3966, lr_0 = 4.3412e-04
Loss = 5.1952e-01, PNorm = 66.8774, GNorm = 1.6882, lr_0 = 4.3788e-04
Loss = 6.2936e-01, PNorm = 66.9254, GNorm = 1.4940, lr_0 = 4.4163e-04
Loss = 5.0803e-01, PNorm = 66.9758, GNorm = 1.0892, lr_0 = 4.4538e-04
Loss = 5.1503e-01, PNorm = 67.0293, GNorm = 2.3006, lr_0 = 4.4912e-04
Loss = 4.8358e-01, PNorm = 67.0779, GNorm = 1.0488, lr_0 = 4.5287e-04
Loss = 5.4987e-01, PNorm = 67.1304, GNorm = 2.3093, lr_0 = 4.5662e-04
Loss = 5.7178e-01, PNorm = 67.1814, GNorm = 1.3792, lr_0 = 4.6038e-04
Loss = 4.4333e-01, PNorm = 67.2340, GNorm = 1.1373, lr_0 = 4.6413e-04
Loss = 4.2673e-01, PNorm = 67.2848, GNorm = 1.2244, lr_0 = 4.6787e-04
Loss = 4.7439e-01, PNorm = 67.3327, GNorm = 1.2043, lr_0 = 4.7162e-04
Loss = 5.8034e-01, PNorm = 67.3774, GNorm = 1.2890, lr_0 = 4.7537e-04
Loss = 4.9668e-01, PNorm = 67.4323, GNorm = 0.9655, lr_0 = 4.7913e-04
Loss = 5.6626e-01, PNorm = 67.4878, GNorm = 1.7115, lr_0 = 4.8288e-04
Loss = 5.9918e-01, PNorm = 67.5479, GNorm = 1.0349, lr_0 = 4.8663e-04
Loss = 4.7575e-01, PNorm = 67.6016, GNorm = 1.4225, lr_0 = 4.9038e-04
Loss = 5.2797e-01, PNorm = 67.6586, GNorm = 1.2538, lr_0 = 4.9412e-04
Loss = 5.1625e-01, PNorm = 67.7129, GNorm = 1.2873, lr_0 = 4.9788e-04
Loss = 5.2720e-01, PNorm = 67.7766, GNorm = 1.2230, lr_0 = 5.0163e-04
Loss = 3.9373e-01, PNorm = 67.8358, GNorm = 1.2563, lr_0 = 5.0538e-04
Loss = 5.7744e-01, PNorm = 67.8906, GNorm = 1.1094, lr_0 = 5.0913e-04
Loss = 4.7724e-01, PNorm = 67.9571, GNorm = 1.6154, lr_0 = 5.1287e-04
Loss = 5.1756e-01, PNorm = 68.0280, GNorm = 1.7456, lr_0 = 5.1663e-04
Loss = 4.6609e-01, PNorm = 68.0892, GNorm = 1.5554, lr_0 = 5.2038e-04
Loss = 4.6843e-01, PNorm = 68.1624, GNorm = 1.1854, lr_0 = 5.2413e-04
Loss = 4.9617e-01, PNorm = 68.2307, GNorm = 1.2790, lr_0 = 5.2788e-04
Loss = 4.9025e-01, PNorm = 68.2888, GNorm = 1.4416, lr_0 = 5.3162e-04
Loss = 4.8639e-01, PNorm = 68.3477, GNorm = 1.1668, lr_0 = 5.3538e-04
Loss = 5.4152e-01, PNorm = 68.4169, GNorm = 1.4185, lr_0 = 5.3912e-04
Loss = 5.1075e-01, PNorm = 68.4845, GNorm = 1.1406, lr_0 = 5.4288e-04
Loss = 4.7681e-01, PNorm = 68.5467, GNorm = 1.3404, lr_0 = 5.4663e-04
Loss = 5.2302e-01, PNorm = 68.6181, GNorm = 1.0978, lr_0 = 5.5038e-04
Validation mae = 0.128017
Epoch 1
Loss = 3.8132e-01, PNorm = 68.6913, GNorm = 1.5566, lr_0 = 5.5413e-04
Loss = 3.4536e-01, PNorm = 68.7573, GNorm = 1.3210, lr_0 = 5.5787e-04
Loss = 3.8058e-01, PNorm = 68.8213, GNorm = 1.0262, lr_0 = 5.6163e-04
Loss = 4.1518e-01, PNorm = 68.8925, GNorm = 1.3199, lr_0 = 5.6538e-04
Loss = 4.0750e-01, PNorm = 68.9710, GNorm = 1.1265, lr_0 = 5.6913e-04
Loss = 3.6458e-01, PNorm = 69.0540, GNorm = 1.5652, lr_0 = 5.7288e-04
Loss = 3.8948e-01, PNorm = 69.1366, GNorm = 1.3009, lr_0 = 5.7662e-04
Loss = 3.7762e-01, PNorm = 69.2294, GNorm = 1.4862, lr_0 = 5.8038e-04
Loss = 3.4085e-01, PNorm = 69.3152, GNorm = 1.1309, lr_0 = 5.8413e-04
Loss = 3.7503e-01, PNorm = 69.4141, GNorm = 1.1166, lr_0 = 5.8788e-04
Loss = 3.6106e-01, PNorm = 69.5119, GNorm = 1.3255, lr_0 = 5.9163e-04
Loss = 3.7685e-01, PNorm = 69.6076, GNorm = 1.1324, lr_0 = 5.9538e-04
Loss = 3.6806e-01, PNorm = 69.7129, GNorm = 1.2869, lr_0 = 5.9913e-04
Loss = 4.1177e-01, PNorm = 69.8103, GNorm = 1.7097, lr_0 = 6.0288e-04
Loss = 3.8512e-01, PNorm = 69.9156, GNorm = 1.6621, lr_0 = 6.0663e-04
Loss = 3.6678e-01, PNorm = 70.0250, GNorm = 0.9881, lr_0 = 6.1038e-04
Loss = 3.3308e-01, PNorm = 70.1140, GNorm = 0.9462, lr_0 = 6.1413e-04
Loss = 3.3966e-01, PNorm = 70.1992, GNorm = 0.9816, lr_0 = 6.1788e-04
Loss = 3.8524e-01, PNorm = 70.2897, GNorm = 1.2812, lr_0 = 6.2163e-04
Loss = 4.0648e-01, PNorm = 70.3799, GNorm = 1.0114, lr_0 = 6.2538e-04
Loss = 4.2337e-01, PNorm = 70.4765, GNorm = 1.0665, lr_0 = 6.2913e-04
Loss = 4.2947e-01, PNorm = 70.5881, GNorm = 1.2925, lr_0 = 6.3288e-04
Loss = 4.2456e-01, PNorm = 70.7009, GNorm = 1.0031, lr_0 = 6.3663e-04
Loss = 4.1395e-01, PNorm = 70.8140, GNorm = 1.0467, lr_0 = 6.4038e-04
Loss = 3.6755e-01, PNorm = 70.9288, GNorm = 1.3809, lr_0 = 6.4413e-04
Loss = 3.8432e-01, PNorm = 71.0317, GNorm = 1.2024, lr_0 = 6.4788e-04
Loss = 4.0023e-01, PNorm = 71.1368, GNorm = 1.0395, lr_0 = 6.5163e-04
Loss = 3.7510e-01, PNorm = 71.2447, GNorm = 0.9988, lr_0 = 6.5538e-04
Loss = 3.3029e-01, PNorm = 71.3404, GNorm = 1.0211, lr_0 = 6.5913e-04
Loss = 4.3283e-01, PNorm = 71.4351, GNorm = 1.4758, lr_0 = 6.6288e-04
Loss = 4.1984e-01, PNorm = 71.5565, GNorm = 1.6342, lr_0 = 6.6663e-04
Loss = 4.2685e-01, PNorm = 71.6636, GNorm = 1.6024, lr_0 = 6.7038e-04
Loss = 4.2993e-01, PNorm = 71.7853, GNorm = 1.6634, lr_0 = 6.7413e-04
Loss = 3.9846e-01, PNorm = 71.9065, GNorm = 1.2389, lr_0 = 6.7788e-04
Loss = 4.8913e-01, PNorm = 72.0232, GNorm = 1.2247, lr_0 = 6.8163e-04
Loss = 4.2025e-01, PNorm = 72.1474, GNorm = 1.2088, lr_0 = 6.8538e-04
Loss = 4.1003e-01, PNorm = 72.2659, GNorm = 1.3168, lr_0 = 6.8913e-04
Loss = 3.8800e-01, PNorm = 72.3844, GNorm = 0.9262, lr_0 = 6.9288e-04
Loss = 4.3859e-01, PNorm = 72.5031, GNorm = 1.0835, lr_0 = 6.9663e-04
Loss = 4.4690e-01, PNorm = 72.6323, GNorm = 1.6598, lr_0 = 7.0038e-04
Loss = 4.7094e-01, PNorm = 72.7740, GNorm = 1.3068, lr_0 = 7.0413e-04
Loss = 4.3310e-01, PNorm = 72.9191, GNorm = 1.8015, lr_0 = 7.0788e-04
Loss = 3.6034e-01, PNorm = 73.0488, GNorm = 0.9932, lr_0 = 7.1163e-04
Loss = 4.1969e-01, PNorm = 73.1644, GNorm = 1.1383, lr_0 = 7.1538e-04
Loss = 4.2946e-01, PNorm = 73.2943, GNorm = 1.2781, lr_0 = 7.1913e-04
Loss = 3.8262e-01, PNorm = 73.4203, GNorm = 1.2038, lr_0 = 7.2288e-04
Loss = 3.6810e-01, PNorm = 73.5413, GNorm = 1.5529, lr_0 = 7.2663e-04
Loss = 4.5678e-01, PNorm = 73.6577, GNorm = 1.3400, lr_0 = 7.3038e-04
Loss = 3.9483e-01, PNorm = 73.7829, GNorm = 1.4676, lr_0 = 7.3413e-04
Loss = 4.2004e-01, PNorm = 73.9090, GNorm = 1.4585, lr_0 = 7.3788e-04
Loss = 4.8741e-01, PNorm = 74.0319, GNorm = 1.3456, lr_0 = 7.4163e-04
Loss = 4.3274e-01, PNorm = 74.1744, GNorm = 1.0089, lr_0 = 7.4538e-04
Loss = 3.9584e-01, PNorm = 74.3033, GNorm = 1.5516, lr_0 = 7.4913e-04
Loss = 3.8692e-01, PNorm = 74.4280, GNorm = 0.9326, lr_0 = 7.5288e-04
Loss = 4.1791e-01, PNorm = 74.5597, GNorm = 1.3007, lr_0 = 7.5663e-04
Loss = 4.9509e-01, PNorm = 74.6944, GNorm = 1.6499, lr_0 = 7.6038e-04
Loss = 3.9443e-01, PNorm = 74.8278, GNorm = 0.7726, lr_0 = 7.6413e-04
Loss = 4.4070e-01, PNorm = 74.9702, GNorm = 1.6850, lr_0 = 7.6788e-04
Loss = 4.5522e-01, PNorm = 75.0847, GNorm = 0.8630, lr_0 = 7.7163e-04
Loss = 4.2609e-01, PNorm = 75.2231, GNorm = 0.9390, lr_0 = 7.7538e-04
Loss = 4.6076e-01, PNorm = 75.3585, GNorm = 1.0818, lr_0 = 7.7913e-04
Loss = 4.3742e-01, PNorm = 75.4973, GNorm = 1.0620, lr_0 = 7.8288e-04
Loss = 4.4917e-01, PNorm = 75.6389, GNorm = 1.1553, lr_0 = 7.8663e-04
Loss = 3.9624e-01, PNorm = 75.7745, GNorm = 1.0931, lr_0 = 7.9038e-04
Loss = 3.9449e-01, PNorm = 75.9180, GNorm = 1.1978, lr_0 = 7.9413e-04
Loss = 5.0319e-01, PNorm = 76.0615, GNorm = 1.5720, lr_0 = 7.9788e-04
Loss = 4.0098e-01, PNorm = 76.2118, GNorm = 1.3073, lr_0 = 8.0163e-04
Loss = 3.7698e-01, PNorm = 76.3501, GNorm = 1.1186, lr_0 = 8.0538e-04
Loss = 4.2855e-01, PNorm = 76.4717, GNorm = 1.2224, lr_0 = 8.0913e-04
Loss = 4.5434e-01, PNorm = 76.6137, GNorm = 1.0740, lr_0 = 8.1288e-04
Loss = 3.8865e-01, PNorm = 76.7559, GNorm = 0.9986, lr_0 = 8.1663e-04
Loss = 4.5852e-01, PNorm = 76.9025, GNorm = 1.5226, lr_0 = 8.2038e-04
Loss = 4.4783e-01, PNorm = 77.0517, GNorm = 1.1750, lr_0 = 8.2413e-04
Loss = 4.3438e-01, PNorm = 77.2133, GNorm = 1.1145, lr_0 = 8.2788e-04
Loss = 4.0308e-01, PNorm = 77.3788, GNorm = 1.5339, lr_0 = 8.3163e-04
Loss = 4.8345e-01, PNorm = 77.5459, GNorm = 1.1945, lr_0 = 8.3538e-04
Loss = 4.1360e-01, PNorm = 77.7133, GNorm = 1.0712, lr_0 = 8.3913e-04
Loss = 4.5650e-01, PNorm = 77.8688, GNorm = 1.2940, lr_0 = 8.4288e-04
Loss = 4.4587e-01, PNorm = 78.0273, GNorm = 1.0227, lr_0 = 8.4663e-04
Loss = 3.7854e-01, PNorm = 78.1822, GNorm = 1.2669, lr_0 = 8.5038e-04
Loss = 4.4227e-01, PNorm = 78.3162, GNorm = 1.1680, lr_0 = 8.5413e-04
Loss = 4.8678e-01, PNorm = 78.4613, GNorm = 1.2557, lr_0 = 8.5788e-04
Loss = 5.0418e-01, PNorm = 78.6054, GNorm = 1.3145, lr_0 = 8.6163e-04
Loss = 4.3820e-01, PNorm = 78.7587, GNorm = 1.2114, lr_0 = 8.6538e-04
Loss = 3.9674e-01, PNorm = 78.9078, GNorm = 1.1435, lr_0 = 8.6913e-04
Loss = 4.4589e-01, PNorm = 79.0584, GNorm = 1.3947, lr_0 = 8.7288e-04
Loss = 4.3874e-01, PNorm = 79.2132, GNorm = 1.0022, lr_0 = 8.7663e-04
Loss = 4.7928e-01, PNorm = 79.3711, GNorm = 1.1810, lr_0 = 8.8038e-04
Loss = 4.0321e-01, PNorm = 79.5373, GNorm = 1.1584, lr_0 = 8.8413e-04
Loss = 4.2700e-01, PNorm = 79.6872, GNorm = 1.6239, lr_0 = 8.8788e-04
Loss = 4.3256e-01, PNorm = 79.8550, GNorm = 1.2863, lr_0 = 8.9163e-04
Loss = 4.1816e-01, PNorm = 80.0084, GNorm = 1.8311, lr_0 = 8.9538e-04
Loss = 4.0205e-01, PNorm = 80.1582, GNorm = 1.0284, lr_0 = 8.9913e-04
Loss = 4.2411e-01, PNorm = 80.3192, GNorm = 0.8287, lr_0 = 9.0288e-04
Loss = 4.0790e-01, PNorm = 80.4712, GNorm = 1.1655, lr_0 = 9.0663e-04
Loss = 4.3240e-01, PNorm = 80.6309, GNorm = 1.1395, lr_0 = 9.1038e-04
Loss = 4.4764e-01, PNorm = 80.7966, GNorm = 0.9498, lr_0 = 9.1413e-04
Loss = 4.6238e-01, PNorm = 80.9529, GNorm = 0.9430, lr_0 = 9.1788e-04
Loss = 4.0899e-01, PNorm = 81.0993, GNorm = 0.7607, lr_0 = 9.2163e-04
Loss = 4.3540e-01, PNorm = 81.2576, GNorm = 1.2772, lr_0 = 9.2538e-04
Loss = 4.3924e-01, PNorm = 81.4091, GNorm = 0.8209, lr_0 = 9.2913e-04
Loss = 4.5825e-01, PNorm = 81.5756, GNorm = 1.0823, lr_0 = 9.3288e-04
Loss = 4.2654e-01, PNorm = 81.7240, GNorm = 1.2087, lr_0 = 9.3663e-04
Loss = 4.4728e-01, PNorm = 81.8706, GNorm = 0.6983, lr_0 = 9.4038e-04
Loss = 4.0009e-01, PNorm = 82.0363, GNorm = 1.2507, lr_0 = 9.4413e-04
Loss = 4.4722e-01, PNorm = 82.1879, GNorm = 1.4057, lr_0 = 9.4788e-04
Loss = 4.5772e-01, PNorm = 82.3638, GNorm = 1.2677, lr_0 = 9.5163e-04
Loss = 4.6965e-01, PNorm = 82.5246, GNorm = 1.1801, lr_0 = 9.5538e-04
Loss = 4.3008e-01, PNorm = 82.6828, GNorm = 1.3894, lr_0 = 9.5913e-04
Loss = 4.5284e-01, PNorm = 82.8488, GNorm = 1.2541, lr_0 = 9.6288e-04
Loss = 4.4670e-01, PNorm = 83.0081, GNorm = 1.1540, lr_0 = 9.6663e-04
Loss = 4.2023e-01, PNorm = 83.1741, GNorm = 0.7678, lr_0 = 9.7038e-04
Loss = 5.6886e-01, PNorm = 83.3450, GNorm = 1.5504, lr_0 = 9.7413e-04
Loss = 3.9926e-01, PNorm = 83.5039, GNorm = 1.3596, lr_0 = 9.7788e-04
Loss = 4.1758e-01, PNorm = 83.6596, GNorm = 0.8716, lr_0 = 9.8163e-04
Loss = 3.7200e-01, PNorm = 83.8017, GNorm = 1.1736, lr_0 = 9.8537e-04
Loss = 4.5604e-01, PNorm = 83.9563, GNorm = 0.9793, lr_0 = 9.8912e-04
Loss = 4.1874e-01, PNorm = 84.1093, GNorm = 1.6841, lr_0 = 9.9288e-04
Loss = 3.8982e-01, PNorm = 84.2661, GNorm = 0.7805, lr_0 = 9.9663e-04
Loss = 4.7080e-01, PNorm = 84.4353, GNorm = 1.0770, lr_0 = 9.9993e-04
Validation mae = 0.126123
Epoch 2
Loss = 2.7768e-01, PNorm = 84.6041, GNorm = 1.0410, lr_0 = 9.9925e-04
Loss = 2.3986e-01, PNorm = 84.7621, GNorm = 0.8304, lr_0 = 9.9856e-04
Loss = 2.9926e-01, PNorm = 84.9062, GNorm = 0.5678, lr_0 = 9.9788e-04
Loss = 2.9475e-01, PNorm = 85.0531, GNorm = 1.4981, lr_0 = 9.9719e-04
Loss = 3.4999e-01, PNorm = 85.2120, GNorm = 2.3338, lr_0 = 9.9651e-04
Loss = 2.7609e-01, PNorm = 85.3582, GNorm = 0.8873, lr_0 = 9.9583e-04
Loss = 2.6542e-01, PNorm = 85.5156, GNorm = 0.9006, lr_0 = 9.9515e-04
Loss = 2.7140e-01, PNorm = 85.6572, GNorm = 1.1302, lr_0 = 9.9446e-04
Loss = 2.6685e-01, PNorm = 85.7953, GNorm = 1.2322, lr_0 = 9.9378e-04
Loss = 3.1626e-01, PNorm = 85.9620, GNorm = 0.9274, lr_0 = 9.9310e-04
Loss = 2.8382e-01, PNorm = 86.1337, GNorm = 0.8998, lr_0 = 9.9242e-04
Loss = 2.6059e-01, PNorm = 86.2945, GNorm = 1.1084, lr_0 = 9.9174e-04
Loss = 2.5494e-01, PNorm = 86.4603, GNorm = 1.2666, lr_0 = 9.9106e-04
Loss = 2.6419e-01, PNorm = 86.6196, GNorm = 1.3909, lr_0 = 9.9038e-04
Loss = 3.2710e-01, PNorm = 86.7930, GNorm = 1.4274, lr_0 = 9.8971e-04
Loss = 2.8413e-01, PNorm = 86.9704, GNorm = 1.1806, lr_0 = 9.8903e-04
Loss = 2.4954e-01, PNorm = 87.1598, GNorm = 0.9211, lr_0 = 9.8835e-04
Loss = 2.8485e-01, PNorm = 87.3240, GNorm = 0.8906, lr_0 = 9.8767e-04
Loss = 2.4910e-01, PNorm = 87.4919, GNorm = 1.0366, lr_0 = 9.8700e-04
Loss = 2.7540e-01, PNorm = 87.6597, GNorm = 1.1645, lr_0 = 9.8632e-04
Loss = 2.7895e-01, PNorm = 87.8309, GNorm = 1.1383, lr_0 = 9.8564e-04
Loss = 2.9883e-01, PNorm = 88.0049, GNorm = 1.4060, lr_0 = 9.8497e-04
Loss = 2.7141e-01, PNorm = 88.2051, GNorm = 1.1207, lr_0 = 9.8429e-04
Loss = 3.2453e-01, PNorm = 88.3721, GNorm = 1.0694, lr_0 = 9.8362e-04
Loss = 2.9188e-01, PNorm = 88.5765, GNorm = 1.2620, lr_0 = 9.8295e-04
Loss = 3.1545e-01, PNorm = 88.7857, GNorm = 1.0719, lr_0 = 9.8227e-04
Loss = 3.2172e-01, PNorm = 88.9823, GNorm = 1.0312, lr_0 = 9.8160e-04
Loss = 2.5237e-01, PNorm = 89.1808, GNorm = 1.2863, lr_0 = 9.8093e-04
Loss = 3.5383e-01, PNorm = 89.3408, GNorm = 1.1456, lr_0 = 9.8026e-04
Loss = 2.8632e-01, PNorm = 89.5254, GNorm = 0.9934, lr_0 = 9.7958e-04
Loss = 3.1090e-01, PNorm = 89.6954, GNorm = 1.1892, lr_0 = 9.7891e-04
Loss = 3.0050e-01, PNorm = 89.8743, GNorm = 0.9501, lr_0 = 9.7824e-04
Loss = 3.0357e-01, PNorm = 90.0473, GNorm = 1.0511, lr_0 = 9.7757e-04
Loss = 3.4671e-01, PNorm = 90.2235, GNorm = 1.1063, lr_0 = 9.7690e-04
Loss = 3.1275e-01, PNorm = 90.4021, GNorm = 0.6628, lr_0 = 9.7623e-04
Loss = 3.2514e-01, PNorm = 90.5733, GNorm = 1.1785, lr_0 = 9.7556e-04
Loss = 2.8844e-01, PNorm = 90.7450, GNorm = 1.0410, lr_0 = 9.7490e-04
Loss = 3.4571e-01, PNorm = 90.9177, GNorm = 0.9590, lr_0 = 9.7423e-04
Loss = 2.8055e-01, PNorm = 91.0966, GNorm = 1.1260, lr_0 = 9.7356e-04
Loss = 3.3769e-01, PNorm = 91.2965, GNorm = 1.1787, lr_0 = 9.7289e-04
Loss = 3.3295e-01, PNorm = 91.4785, GNorm = 0.9530, lr_0 = 9.7223e-04
Loss = 3.1694e-01, PNorm = 91.6672, GNorm = 2.1459, lr_0 = 9.7156e-04
Loss = 2.9164e-01, PNorm = 91.8510, GNorm = 1.0539, lr_0 = 9.7090e-04
Loss = 3.0496e-01, PNorm = 92.0186, GNorm = 0.9465, lr_0 = 9.7023e-04
Loss = 2.7286e-01, PNorm = 92.1794, GNorm = 1.2460, lr_0 = 9.6957e-04
Loss = 2.7998e-01, PNorm = 92.3312, GNorm = 1.0567, lr_0 = 9.6890e-04
Loss = 3.3560e-01, PNorm = 92.4926, GNorm = 1.0256, lr_0 = 9.6824e-04
Loss = 3.2639e-01, PNorm = 92.6714, GNorm = 0.9983, lr_0 = 9.6757e-04
Loss = 3.2194e-01, PNorm = 92.8376, GNorm = 0.7611, lr_0 = 9.6691e-04
Loss = 3.0933e-01, PNorm = 92.9984, GNorm = 1.1356, lr_0 = 9.6625e-04
Loss = 2.9635e-01, PNorm = 93.1562, GNorm = 1.1549, lr_0 = 9.6559e-04
Loss = 2.7184e-01, PNorm = 93.3038, GNorm = 1.0509, lr_0 = 9.6493e-04
Loss = 2.9405e-01, PNorm = 93.4630, GNorm = 0.7023, lr_0 = 9.6427e-04
Loss = 3.4219e-01, PNorm = 93.6226, GNorm = 1.0290, lr_0 = 9.6360e-04
Loss = 3.2334e-01, PNorm = 93.8001, GNorm = 1.3439, lr_0 = 9.6294e-04
Loss = 3.4137e-01, PNorm = 93.9694, GNorm = 1.0165, lr_0 = 9.6228e-04
Loss = 2.9164e-01, PNorm = 94.1380, GNorm = 1.2524, lr_0 = 9.6163e-04
Loss = 3.7878e-01, PNorm = 94.3095, GNorm = 0.9658, lr_0 = 9.6097e-04
Loss = 3.2681e-01, PNorm = 94.4742, GNorm = 1.7281, lr_0 = 9.6031e-04
Loss = 3.1974e-01, PNorm = 94.6308, GNorm = 0.9846, lr_0 = 9.5965e-04
Loss = 3.1551e-01, PNorm = 94.8025, GNorm = 0.9023, lr_0 = 9.5899e-04
Loss = 2.6039e-01, PNorm = 94.9504, GNorm = 0.9937, lr_0 = 9.5834e-04
Loss = 2.4443e-01, PNorm = 95.0927, GNorm = 0.7595, lr_0 = 9.5768e-04
Loss = 2.9686e-01, PNorm = 95.2360, GNorm = 1.1868, lr_0 = 9.5702e-04
Loss = 3.3783e-01, PNorm = 95.3889, GNorm = 0.7475, lr_0 = 9.5637e-04
Loss = 3.1485e-01, PNorm = 95.5729, GNorm = 1.2274, lr_0 = 9.5571e-04
Loss = 3.1654e-01, PNorm = 95.7322, GNorm = 0.8938, lr_0 = 9.5506e-04
Loss = 3.1369e-01, PNorm = 95.9026, GNorm = 0.8525, lr_0 = 9.5440e-04
Loss = 3.2251e-01, PNorm = 96.0676, GNorm = 1.6235, lr_0 = 9.5375e-04
Loss = 3.6127e-01, PNorm = 96.2296, GNorm = 1.2165, lr_0 = 9.5310e-04
Loss = 3.1254e-01, PNorm = 96.4073, GNorm = 1.3929, lr_0 = 9.5244e-04
Loss = 2.7631e-01, PNorm = 96.5765, GNorm = 1.0649, lr_0 = 9.5179e-04
Loss = 3.1059e-01, PNorm = 96.7323, GNorm = 0.9647, lr_0 = 9.5114e-04
Loss = 3.0284e-01, PNorm = 96.8940, GNorm = 1.0603, lr_0 = 9.5049e-04
Loss = 3.0154e-01, PNorm = 97.0518, GNorm = 0.6949, lr_0 = 9.4984e-04
Loss = 2.8948e-01, PNorm = 97.1991, GNorm = 0.8386, lr_0 = 9.4919e-04
Loss = 3.2686e-01, PNorm = 97.3565, GNorm = 1.6948, lr_0 = 9.4854e-04
Loss = 2.7536e-01, PNorm = 97.5020, GNorm = 1.0362, lr_0 = 9.4789e-04
Loss = 3.2691e-01, PNorm = 97.6727, GNorm = 1.2813, lr_0 = 9.4724e-04
Loss = 3.4692e-01, PNorm = 97.8169, GNorm = 1.5249, lr_0 = 9.4659e-04
Loss = 3.3780e-01, PNorm = 97.9888, GNorm = 1.0191, lr_0 = 9.4594e-04
Loss = 3.2251e-01, PNorm = 98.1455, GNorm = 0.8805, lr_0 = 9.4529e-04
Loss = 3.3235e-01, PNorm = 98.3136, GNorm = 0.9361, lr_0 = 9.4464e-04
Loss = 3.3575e-01, PNorm = 98.4679, GNorm = 1.2579, lr_0 = 9.4400e-04
Loss = 3.4486e-01, PNorm = 98.6241, GNorm = 1.0467, lr_0 = 9.4335e-04
Loss = 3.8956e-01, PNorm = 98.7980, GNorm = 2.3353, lr_0 = 9.4270e-04
Loss = 3.1981e-01, PNorm = 98.9740, GNorm = 1.2039, lr_0 = 9.4206e-04
Loss = 3.4618e-01, PNorm = 99.1469, GNorm = 1.5416, lr_0 = 9.4141e-04
Loss = 3.1288e-01, PNorm = 99.3110, GNorm = 0.8350, lr_0 = 9.4077e-04
Loss = 3.2967e-01, PNorm = 99.4685, GNorm = 1.1621, lr_0 = 9.4012e-04
Loss = 3.0298e-01, PNorm = 99.6080, GNorm = 1.5973, lr_0 = 9.3948e-04
Loss = 2.7141e-01, PNorm = 99.7328, GNorm = 1.0633, lr_0 = 9.3884e-04
Loss = 3.6836e-01, PNorm = 99.8765, GNorm = 0.9995, lr_0 = 9.3819e-04
Loss = 2.9259e-01, PNorm = 100.0193, GNorm = 0.9287, lr_0 = 9.3755e-04
Loss = 4.4786e-01, PNorm = 100.1708, GNorm = 1.9725, lr_0 = 9.3691e-04
Loss = 3.0125e-01, PNorm = 100.3292, GNorm = 1.3666, lr_0 = 9.3627e-04
Loss = 3.0108e-01, PNorm = 100.4747, GNorm = 1.1486, lr_0 = 9.3562e-04
Loss = 3.3425e-01, PNorm = 100.6240, GNorm = 1.2170, lr_0 = 9.3498e-04
Loss = 3.3606e-01, PNorm = 100.7602, GNorm = 1.3570, lr_0 = 9.3434e-04
Loss = 2.8574e-01, PNorm = 100.9120, GNorm = 1.1746, lr_0 = 9.3370e-04
Loss = 2.8454e-01, PNorm = 101.0487, GNorm = 1.0228, lr_0 = 9.3306e-04
Loss = 3.3939e-01, PNorm = 101.1778, GNorm = 1.3108, lr_0 = 9.3242e-04
Loss = 3.2106e-01, PNorm = 101.3176, GNorm = 1.4274, lr_0 = 9.3178e-04
Loss = 3.0959e-01, PNorm = 101.4449, GNorm = 1.2193, lr_0 = 9.3115e-04
Loss = 3.3131e-01, PNorm = 101.5795, GNorm = 1.3710, lr_0 = 9.3051e-04
Loss = 4.0401e-01, PNorm = 101.7123, GNorm = 2.8110, lr_0 = 9.2987e-04
Loss = 3.4568e-01, PNorm = 101.8846, GNorm = 1.5309, lr_0 = 9.2923e-04
Loss = 3.2118e-01, PNorm = 102.0386, GNorm = 1.0633, lr_0 = 9.2860e-04
Loss = 3.8395e-01, PNorm = 102.2154, GNorm = 0.9132, lr_0 = 9.2796e-04
Loss = 3.2419e-01, PNorm = 102.3809, GNorm = 1.2771, lr_0 = 9.2733e-04
Loss = 2.8530e-01, PNorm = 102.5322, GNorm = 0.9157, lr_0 = 9.2669e-04
Loss = 3.3705e-01, PNorm = 102.6868, GNorm = 0.9288, lr_0 = 9.2606e-04
Loss = 3.5343e-01, PNorm = 102.8493, GNorm = 0.9732, lr_0 = 9.2542e-04
Loss = 3.7260e-01, PNorm = 103.0077, GNorm = 1.0919, lr_0 = 9.2479e-04
Loss = 3.1521e-01, PNorm = 103.1836, GNorm = 1.2020, lr_0 = 9.2415e-04
Loss = 2.9389e-01, PNorm = 103.3288, GNorm = 2.0363, lr_0 = 9.2352e-04
Loss = 3.4221e-01, PNorm = 103.4679, GNorm = 0.7769, lr_0 = 9.2289e-04
Loss = 3.5132e-01, PNorm = 103.6213, GNorm = 1.1798, lr_0 = 9.2226e-04
Loss = 3.1863e-01, PNorm = 103.7448, GNorm = 0.9882, lr_0 = 9.2162e-04
Loss = 3.0749e-01, PNorm = 103.8760, GNorm = 1.2629, lr_0 = 9.2099e-04
Validation mae = 0.124290
Epoch 3
Loss = 2.1580e-01, PNorm = 104.0012, GNorm = 1.7097, lr_0 = 9.2036e-04
Loss = 1.8312e-01, PNorm = 104.1218, GNorm = 0.8366, lr_0 = 9.1973e-04
Loss = 1.9310e-01, PNorm = 104.2185, GNorm = 0.9338, lr_0 = 9.1910e-04
Loss = 1.8666e-01, PNorm = 104.3123, GNorm = 1.6649, lr_0 = 9.1847e-04
Loss = 1.6421e-01, PNorm = 104.4010, GNorm = 0.7122, lr_0 = 9.1784e-04
Loss = 1.6861e-01, PNorm = 104.4833, GNorm = 0.9665, lr_0 = 9.1721e-04
Loss = 1.9567e-01, PNorm = 104.5816, GNorm = 0.6850, lr_0 = 9.1658e-04
Loss = 1.9770e-01, PNorm = 104.6782, GNorm = 0.8213, lr_0 = 9.1596e-04
Loss = 1.7185e-01, PNorm = 104.7952, GNorm = 0.6876, lr_0 = 9.1533e-04
Loss = 1.7882e-01, PNorm = 104.8949, GNorm = 0.8114, lr_0 = 9.1470e-04
Loss = 1.9178e-01, PNorm = 105.0078, GNorm = 1.1133, lr_0 = 9.1408e-04
Loss = 1.5400e-01, PNorm = 105.1156, GNorm = 0.9429, lr_0 = 9.1345e-04
Loss = 1.8241e-01, PNorm = 105.2201, GNorm = 0.7021, lr_0 = 9.1282e-04
Loss = 1.6549e-01, PNorm = 105.3313, GNorm = 0.8631, lr_0 = 9.1220e-04
Loss = 1.7439e-01, PNorm = 105.4307, GNorm = 0.8543, lr_0 = 9.1157e-04
Loss = 1.8035e-01, PNorm = 105.5271, GNorm = 0.6317, lr_0 = 9.1095e-04
Loss = 1.9027e-01, PNorm = 105.6375, GNorm = 0.6698, lr_0 = 9.1032e-04
Loss = 1.8400e-01, PNorm = 105.7518, GNorm = 0.8705, lr_0 = 9.0970e-04
Loss = 1.9922e-01, PNorm = 105.8590, GNorm = 1.4828, lr_0 = 9.0908e-04
Loss = 2.0049e-01, PNorm = 105.9678, GNorm = 0.9204, lr_0 = 9.0846e-04
Loss = 1.7302e-01, PNorm = 106.0765, GNorm = 1.0293, lr_0 = 9.0783e-04
Loss = 1.8240e-01, PNorm = 106.1877, GNorm = 1.0536, lr_0 = 9.0721e-04
Loss = 1.6873e-01, PNorm = 106.3142, GNorm = 0.8426, lr_0 = 9.0659e-04
Loss = 2.0145e-01, PNorm = 106.4122, GNorm = 0.8742, lr_0 = 9.0597e-04
Loss = 1.7089e-01, PNorm = 106.5313, GNorm = 0.6909, lr_0 = 9.0535e-04
Loss = 1.8188e-01, PNorm = 106.6470, GNorm = 1.3948, lr_0 = 9.0473e-04
Loss = 1.9114e-01, PNorm = 106.7457, GNorm = 0.8006, lr_0 = 9.0411e-04
Loss = 1.7332e-01, PNorm = 106.8619, GNorm = 0.7210, lr_0 = 9.0349e-04
Loss = 1.9447e-01, PNorm = 106.9730, GNorm = 0.5705, lr_0 = 9.0287e-04
Loss = 1.5979e-01, PNorm = 107.0926, GNorm = 1.2241, lr_0 = 9.0225e-04
Loss = 1.6462e-01, PNorm = 107.1866, GNorm = 1.0218, lr_0 = 9.0163e-04
Loss = 1.8291e-01, PNorm = 107.2992, GNorm = 0.9015, lr_0 = 9.0102e-04
Loss = 2.0609e-01, PNorm = 107.3984, GNorm = 0.6960, lr_0 = 9.0040e-04
Loss = 1.7321e-01, PNorm = 107.5163, GNorm = 1.0201, lr_0 = 8.9978e-04
Loss = 1.8396e-01, PNorm = 107.6248, GNorm = 0.7673, lr_0 = 8.9916e-04
Loss = 2.3459e-01, PNorm = 107.7412, GNorm = 0.9866, lr_0 = 8.9855e-04
Loss = 1.8128e-01, PNorm = 107.8718, GNorm = 0.9884, lr_0 = 8.9793e-04
Loss = 1.8510e-01, PNorm = 107.9850, GNorm = 1.2954, lr_0 = 8.9732e-04
Loss = 1.8872e-01, PNorm = 108.1101, GNorm = 0.9844, lr_0 = 8.9670e-04
Loss = 1.8936e-01, PNorm = 108.2231, GNorm = 0.8420, lr_0 = 8.9609e-04
Loss = 1.8345e-01, PNorm = 108.3330, GNorm = 0.6879, lr_0 = 8.9548e-04
Loss = 1.6883e-01, PNorm = 108.4423, GNorm = 0.6994, lr_0 = 8.9486e-04
Loss = 1.8150e-01, PNorm = 108.5542, GNorm = 1.2952, lr_0 = 8.9425e-04
Loss = 1.7607e-01, PNorm = 108.6572, GNorm = 1.1607, lr_0 = 8.9364e-04
Loss = 2.2302e-01, PNorm = 108.7644, GNorm = 0.9275, lr_0 = 8.9302e-04
Loss = 1.8100e-01, PNorm = 108.8836, GNorm = 0.6590, lr_0 = 8.9241e-04
Loss = 2.1323e-01, PNorm = 108.9887, GNorm = 0.7123, lr_0 = 8.9180e-04
Loss = 2.5013e-01, PNorm = 109.1129, GNorm = 1.3231, lr_0 = 8.9119e-04
Loss = 2.3393e-01, PNorm = 109.2491, GNorm = 1.4096, lr_0 = 8.9058e-04
Loss = 2.1925e-01, PNorm = 109.3842, GNorm = 0.9817, lr_0 = 8.8997e-04
Loss = 2.0736e-01, PNorm = 109.5282, GNorm = 0.7514, lr_0 = 8.8936e-04
Loss = 2.1331e-01, PNorm = 109.6569, GNorm = 0.9245, lr_0 = 8.8875e-04
Loss = 2.0463e-01, PNorm = 109.7872, GNorm = 1.0090, lr_0 = 8.8814e-04
Loss = 2.0128e-01, PNorm = 109.9106, GNorm = 1.6570, lr_0 = 8.8753e-04
Loss = 1.8236e-01, PNorm = 110.0334, GNorm = 1.2464, lr_0 = 8.8693e-04
Loss = 1.9960e-01, PNorm = 110.1473, GNorm = 0.7161, lr_0 = 8.8632e-04
Loss = 1.9539e-01, PNorm = 110.2633, GNorm = 0.8763, lr_0 = 8.8571e-04
Loss = 1.9010e-01, PNorm = 110.3742, GNorm = 0.7754, lr_0 = 8.8510e-04
Loss = 2.1522e-01, PNorm = 110.4869, GNorm = 1.5348, lr_0 = 8.8450e-04
Loss = 2.2038e-01, PNorm = 110.6056, GNorm = 0.7613, lr_0 = 8.8389e-04
Loss = 1.8181e-01, PNorm = 110.7255, GNorm = 1.0045, lr_0 = 8.8329e-04
Loss = 2.2964e-01, PNorm = 110.8456, GNorm = 0.9505, lr_0 = 8.8268e-04
Loss = 1.7147e-01, PNorm = 110.9551, GNorm = 1.2171, lr_0 = 8.8208e-04
Loss = 2.0380e-01, PNorm = 111.0727, GNorm = 0.9121, lr_0 = 8.8147e-04
Loss = 2.1953e-01, PNorm = 111.2044, GNorm = 1.0016, lr_0 = 8.8087e-04
Loss = 2.0863e-01, PNorm = 111.3203, GNorm = 0.6208, lr_0 = 8.8026e-04
Loss = 2.3287e-01, PNorm = 111.4661, GNorm = 1.0773, lr_0 = 8.7966e-04
Loss = 2.2528e-01, PNorm = 111.5980, GNorm = 1.7301, lr_0 = 8.7906e-04
Loss = 2.1205e-01, PNorm = 111.7197, GNorm = 1.8097, lr_0 = 8.7846e-04
Loss = 1.9687e-01, PNorm = 111.8569, GNorm = 0.9642, lr_0 = 8.7785e-04
Loss = 2.2512e-01, PNorm = 111.9663, GNorm = 1.1031, lr_0 = 8.7725e-04
Loss = 2.1932e-01, PNorm = 112.0928, GNorm = 0.6451, lr_0 = 8.7665e-04
Loss = 2.0935e-01, PNorm = 112.2214, GNorm = 1.0211, lr_0 = 8.7605e-04
Loss = 2.3087e-01, PNorm = 112.3487, GNorm = 0.7603, lr_0 = 8.7545e-04
Loss = 1.9833e-01, PNorm = 112.4733, GNorm = 0.7001, lr_0 = 8.7485e-04
Loss = 2.2507e-01, PNorm = 112.5973, GNorm = 1.1206, lr_0 = 8.7425e-04
Loss = 2.0919e-01, PNorm = 112.7191, GNorm = 1.0911, lr_0 = 8.7365e-04
Loss = 2.1080e-01, PNorm = 112.8309, GNorm = 1.3339, lr_0 = 8.7306e-04
Loss = 2.0314e-01, PNorm = 112.9531, GNorm = 0.8173, lr_0 = 8.7246e-04
Loss = 2.1141e-01, PNorm = 113.0774, GNorm = 0.8432, lr_0 = 8.7186e-04
Loss = 2.1683e-01, PNorm = 113.2086, GNorm = 0.7393, lr_0 = 8.7126e-04
Loss = 2.1393e-01, PNorm = 113.3427, GNorm = 0.7877, lr_0 = 8.7067e-04
Loss = 2.0180e-01, PNorm = 113.4675, GNorm = 1.1579, lr_0 = 8.7007e-04
Loss = 1.5943e-01, PNorm = 113.5926, GNorm = 0.6606, lr_0 = 8.6947e-04
Loss = 1.8326e-01, PNorm = 113.7032, GNorm = 0.6916, lr_0 = 8.6888e-04
Loss = 2.0512e-01, PNorm = 113.8089, GNorm = 1.1361, lr_0 = 8.6828e-04
Loss = 1.9342e-01, PNorm = 113.9181, GNorm = 0.9825, lr_0 = 8.6769e-04
Loss = 2.2025e-01, PNorm = 114.0262, GNorm = 0.9780, lr_0 = 8.6709e-04
Loss = 2.1599e-01, PNorm = 114.1455, GNorm = 0.8005, lr_0 = 8.6650e-04
Loss = 2.0636e-01, PNorm = 114.2548, GNorm = 1.0747, lr_0 = 8.6590e-04
Loss = 2.2448e-01, PNorm = 114.3679, GNorm = 0.8615, lr_0 = 8.6531e-04
Loss = 2.2545e-01, PNorm = 114.4899, GNorm = 1.0253, lr_0 = 8.6472e-04
Loss = 2.1654e-01, PNorm = 114.6088, GNorm = 0.8005, lr_0 = 8.6413e-04
Loss = 2.1565e-01, PNorm = 114.7349, GNorm = 1.2343, lr_0 = 8.6353e-04
Loss = 2.2094e-01, PNorm = 114.8523, GNorm = 2.1652, lr_0 = 8.6294e-04
Loss = 2.0669e-01, PNorm = 114.9742, GNorm = 1.1260, lr_0 = 8.6235e-04
Loss = 1.8149e-01, PNorm = 115.0875, GNorm = 0.8949, lr_0 = 8.6176e-04
Loss = 2.2725e-01, PNorm = 115.2060, GNorm = 0.9741, lr_0 = 8.6117e-04
Loss = 2.3345e-01, PNorm = 115.3214, GNorm = 1.0341, lr_0 = 8.6058e-04
Loss = 2.6537e-01, PNorm = 115.4520, GNorm = 0.8028, lr_0 = 8.5999e-04
Loss = 2.2674e-01, PNorm = 115.5886, GNorm = 1.2727, lr_0 = 8.5940e-04
Loss = 2.2126e-01, PNorm = 115.7223, GNorm = 0.9104, lr_0 = 8.5881e-04
Loss = 2.1701e-01, PNorm = 115.8567, GNorm = 0.9584, lr_0 = 8.5823e-04
Loss = 2.1267e-01, PNorm = 115.9870, GNorm = 1.6947, lr_0 = 8.5764e-04
Loss = 1.8841e-01, PNorm = 116.1108, GNorm = 0.6953, lr_0 = 8.5705e-04
Loss = 2.2474e-01, PNorm = 116.2323, GNorm = 0.7166, lr_0 = 8.5646e-04
Loss = 2.2399e-01, PNorm = 116.3552, GNorm = 0.9165, lr_0 = 8.5588e-04
Loss = 2.3384e-01, PNorm = 116.4799, GNorm = 1.0475, lr_0 = 8.5529e-04
Loss = 2.2800e-01, PNorm = 116.6164, GNorm = 0.9561, lr_0 = 8.5470e-04
Loss = 2.3079e-01, PNorm = 116.7495, GNorm = 0.7428, lr_0 = 8.5412e-04
Loss = 2.2642e-01, PNorm = 116.8826, GNorm = 0.9732, lr_0 = 8.5353e-04
Loss = 2.1930e-01, PNorm = 117.0031, GNorm = 0.8719, lr_0 = 8.5295e-04
Loss = 2.0556e-01, PNorm = 117.1339, GNorm = 0.8323, lr_0 = 8.5236e-04
Loss = 2.1621e-01, PNorm = 117.2578, GNorm = 1.0112, lr_0 = 8.5178e-04
Loss = 2.0998e-01, PNorm = 117.3785, GNorm = 0.9067, lr_0 = 8.5120e-04
Loss = 1.6026e-01, PNorm = 117.5008, GNorm = 0.6964, lr_0 = 8.5061e-04
Loss = 2.3525e-01, PNorm = 117.6083, GNorm = 1.1231, lr_0 = 8.5003e-04
Loss = 2.2801e-01, PNorm = 117.7324, GNorm = 0.8120, lr_0 = 8.4945e-04
Loss = 2.3108e-01, PNorm = 117.8540, GNorm = 0.6847, lr_0 = 8.4887e-04
Loss = 2.2646e-01, PNorm = 117.9768, GNorm = 0.7879, lr_0 = 8.4828e-04
Validation mae = 0.126197
Epoch 4
Loss = 1.3417e-01, PNorm = 118.0951, GNorm = 0.5487, lr_0 = 8.4770e-04
Loss = 1.2194e-01, PNorm = 118.1874, GNorm = 0.6898, lr_0 = 8.4712e-04
Loss = 1.1758e-01, PNorm = 118.2673, GNorm = 0.7103, lr_0 = 8.4654e-04
Loss = 1.1019e-01, PNorm = 118.3375, GNorm = 0.6446, lr_0 = 8.4596e-04
Loss = 1.3096e-01, PNorm = 118.4100, GNorm = 0.7819, lr_0 = 8.4538e-04
Loss = 1.5609e-01, PNorm = 118.4939, GNorm = 0.7944, lr_0 = 8.4480e-04
Loss = 1.2212e-01, PNorm = 118.5606, GNorm = 0.6148, lr_0 = 8.4423e-04
Loss = 1.2458e-01, PNorm = 118.6291, GNorm = 0.6466, lr_0 = 8.4365e-04
Loss = 9.6740e-02, PNorm = 118.6975, GNorm = 0.6413, lr_0 = 8.4307e-04
Loss = 1.1513e-01, PNorm = 118.7694, GNorm = 0.8387, lr_0 = 8.4249e-04
Loss = 1.2103e-01, PNorm = 118.8468, GNorm = 0.6661, lr_0 = 8.4191e-04
Loss = 1.2920e-01, PNorm = 118.9194, GNorm = 0.7214, lr_0 = 8.4134e-04
Loss = 1.0880e-01, PNorm = 118.9941, GNorm = 0.5459, lr_0 = 8.4076e-04
Loss = 1.2563e-01, PNorm = 119.0701, GNorm = 0.4774, lr_0 = 8.4019e-04
Loss = 1.1266e-01, PNorm = 119.1346, GNorm = 0.6072, lr_0 = 8.3961e-04
Loss = 1.2221e-01, PNorm = 119.2114, GNorm = 0.6604, lr_0 = 8.3903e-04
Loss = 1.0771e-01, PNorm = 119.2816, GNorm = 0.6985, lr_0 = 8.3846e-04
Loss = 1.0282e-01, PNorm = 119.3481, GNorm = 0.4248, lr_0 = 8.3789e-04
Loss = 1.2440e-01, PNorm = 119.4221, GNorm = 0.6270, lr_0 = 8.3731e-04
Loss = 1.0814e-01, PNorm = 119.5017, GNorm = 0.7554, lr_0 = 8.3674e-04
Loss = 1.0702e-01, PNorm = 119.5810, GNorm = 0.6010, lr_0 = 8.3616e-04
Loss = 1.3282e-01, PNorm = 119.6618, GNorm = 0.7965, lr_0 = 8.3559e-04
Loss = 1.3416e-01, PNorm = 119.7370, GNorm = 0.4739, lr_0 = 8.3502e-04
Loss = 1.1361e-01, PNorm = 119.8289, GNorm = 0.6318, lr_0 = 8.3445e-04
Loss = 1.2915e-01, PNorm = 119.9042, GNorm = 0.4467, lr_0 = 8.3388e-04
Loss = 1.2627e-01, PNorm = 119.9922, GNorm = 0.9038, lr_0 = 8.3330e-04
Loss = 1.1979e-01, PNorm = 120.0713, GNorm = 0.6687, lr_0 = 8.3273e-04
Loss = 1.2054e-01, PNorm = 120.1479, GNorm = 0.4433, lr_0 = 8.3216e-04
Loss = 1.1869e-01, PNorm = 120.2286, GNorm = 0.7695, lr_0 = 8.3159e-04
Loss = 1.1329e-01, PNorm = 120.2993, GNorm = 0.9805, lr_0 = 8.3102e-04
Loss = 1.0546e-01, PNorm = 120.3686, GNorm = 0.7544, lr_0 = 8.3045e-04
Loss = 1.0735e-01, PNorm = 120.4353, GNorm = 0.6437, lr_0 = 8.2988e-04
Loss = 1.3449e-01, PNorm = 120.5129, GNorm = 0.5844, lr_0 = 8.2932e-04
Loss = 1.1813e-01, PNorm = 120.5905, GNorm = 1.1831, lr_0 = 8.2875e-04
Loss = 1.4042e-01, PNorm = 120.6679, GNorm = 0.5555, lr_0 = 8.2818e-04
Loss = 1.3069e-01, PNorm = 120.7568, GNorm = 0.7083, lr_0 = 8.2761e-04
Loss = 1.1624e-01, PNorm = 120.8353, GNorm = 0.6294, lr_0 = 8.2705e-04
Loss = 1.3793e-01, PNorm = 120.9237, GNorm = 0.8377, lr_0 = 8.2648e-04
Loss = 1.1491e-01, PNorm = 121.0137, GNorm = 0.8680, lr_0 = 8.2591e-04
Loss = 1.1772e-01, PNorm = 121.0921, GNorm = 0.6052, lr_0 = 8.2535e-04
Loss = 1.2419e-01, PNorm = 121.1805, GNorm = 0.8514, lr_0 = 8.2478e-04
Loss = 1.1601e-01, PNorm = 121.2721, GNorm = 0.6199, lr_0 = 8.2422e-04
Loss = 1.2984e-01, PNorm = 121.3544, GNorm = 0.7983, lr_0 = 8.2365e-04
Loss = 1.1744e-01, PNorm = 121.4372, GNorm = 0.8153, lr_0 = 8.2309e-04
Loss = 1.0529e-01, PNorm = 121.5174, GNorm = 0.6146, lr_0 = 8.2252e-04
Loss = 1.4499e-01, PNorm = 121.6040, GNorm = 1.5078, lr_0 = 8.2196e-04
Loss = 1.1289e-01, PNorm = 121.6962, GNorm = 0.6001, lr_0 = 8.2140e-04
Loss = 1.5363e-01, PNorm = 121.7903, GNorm = 0.6007, lr_0 = 8.2084e-04
Loss = 1.2017e-01, PNorm = 121.8866, GNorm = 1.2646, lr_0 = 8.2027e-04
Loss = 1.1401e-01, PNorm = 121.9724, GNorm = 0.6634, lr_0 = 8.1971e-04
Loss = 1.4253e-01, PNorm = 122.0657, GNorm = 0.9832, lr_0 = 8.1915e-04
Loss = 1.2739e-01, PNorm = 122.1488, GNorm = 0.6704, lr_0 = 8.1859e-04
Loss = 1.2338e-01, PNorm = 122.2388, GNorm = 1.0512, lr_0 = 8.1803e-04
Loss = 1.3793e-01, PNorm = 122.3315, GNorm = 0.9502, lr_0 = 8.1747e-04
Loss = 1.3166e-01, PNorm = 122.4356, GNorm = 0.9225, lr_0 = 8.1691e-04
Loss = 1.3668e-01, PNorm = 122.5362, GNorm = 0.6371, lr_0 = 8.1635e-04
Loss = 1.3066e-01, PNorm = 122.6365, GNorm = 0.6930, lr_0 = 8.1579e-04
Loss = 1.3111e-01, PNorm = 122.7329, GNorm = 0.7570, lr_0 = 8.1523e-04
Loss = 1.5065e-01, PNorm = 122.8281, GNorm = 1.0955, lr_0 = 8.1467e-04
Loss = 1.2576e-01, PNorm = 122.9185, GNorm = 0.9853, lr_0 = 8.1411e-04
Loss = 1.2545e-01, PNorm = 123.0109, GNorm = 1.2697, lr_0 = 8.1355e-04
Loss = 1.3320e-01, PNorm = 123.1054, GNorm = 1.1320, lr_0 = 8.1300e-04
Loss = 1.3500e-01, PNorm = 123.2000, GNorm = 0.7029, lr_0 = 8.1244e-04
Loss = 1.3210e-01, PNorm = 123.3030, GNorm = 0.5606, lr_0 = 8.1188e-04
Loss = 1.3820e-01, PNorm = 123.4016, GNorm = 0.7858, lr_0 = 8.1133e-04
Loss = 1.2736e-01, PNorm = 123.5018, GNorm = 0.7864, lr_0 = 8.1077e-04
Loss = 1.0513e-01, PNorm = 123.5971, GNorm = 0.7032, lr_0 = 8.1022e-04
Loss = 1.1805e-01, PNorm = 123.6763, GNorm = 0.9791, lr_0 = 8.0966e-04
Loss = 1.3084e-01, PNorm = 123.7666, GNorm = 0.6386, lr_0 = 8.0911e-04
Loss = 1.3354e-01, PNorm = 123.8560, GNorm = 1.0188, lr_0 = 8.0855e-04
Loss = 1.3422e-01, PNorm = 123.9546, GNorm = 0.6502, lr_0 = 8.0800e-04
Loss = 1.2718e-01, PNorm = 124.0495, GNorm = 0.6653, lr_0 = 8.0745e-04
Loss = 1.3745e-01, PNorm = 124.1482, GNorm = 0.8097, lr_0 = 8.0689e-04
Loss = 1.3160e-01, PNorm = 124.2553, GNorm = 0.8820, lr_0 = 8.0634e-04
Loss = 1.3539e-01, PNorm = 124.3453, GNorm = 0.6211, lr_0 = 8.0579e-04
Loss = 1.2703e-01, PNorm = 124.4503, GNorm = 1.0061, lr_0 = 8.0523e-04
Loss = 1.3876e-01, PNorm = 124.5429, GNorm = 0.6712, lr_0 = 8.0468e-04
Loss = 1.3775e-01, PNorm = 124.6422, GNorm = 0.9105, lr_0 = 8.0413e-04
Loss = 1.3407e-01, PNorm = 124.7404, GNorm = 1.1805, lr_0 = 8.0358e-04
Loss = 1.3221e-01, PNorm = 124.8400, GNorm = 1.1549, lr_0 = 8.0303e-04
Loss = 1.2214e-01, PNorm = 124.9390, GNorm = 0.6262, lr_0 = 8.0248e-04
Loss = 1.3780e-01, PNorm = 125.0408, GNorm = 0.7065, lr_0 = 8.0193e-04
Loss = 1.3787e-01, PNorm = 125.1419, GNorm = 0.7447, lr_0 = 8.0138e-04
Loss = 1.5120e-01, PNorm = 125.2461, GNorm = 1.3786, lr_0 = 8.0083e-04
Loss = 1.4896e-01, PNorm = 125.3437, GNorm = 1.0018, lr_0 = 8.0028e-04
Loss = 1.4479e-01, PNorm = 125.4542, GNorm = 0.8200, lr_0 = 7.9974e-04
Loss = 1.2927e-01, PNorm = 125.5630, GNorm = 0.4994, lr_0 = 7.9919e-04
Loss = 1.8014e-01, PNorm = 125.6632, GNorm = 1.2103, lr_0 = 7.9864e-04
Loss = 1.3314e-01, PNorm = 125.7715, GNorm = 0.7440, lr_0 = 7.9809e-04
Loss = 1.3850e-01, PNorm = 125.8775, GNorm = 1.0810, lr_0 = 7.9755e-04
Loss = 1.4527e-01, PNorm = 125.9832, GNorm = 0.6457, lr_0 = 7.9700e-04
Loss = 1.2736e-01, PNorm = 126.0913, GNorm = 0.6535, lr_0 = 7.9645e-04
Loss = 1.4958e-01, PNorm = 126.1967, GNorm = 0.8943, lr_0 = 7.9591e-04
Loss = 1.4070e-01, PNorm = 126.3101, GNorm = 0.7971, lr_0 = 7.9536e-04
Loss = 1.2823e-01, PNorm = 126.4173, GNorm = 0.9055, lr_0 = 7.9482e-04
Loss = 1.5447e-01, PNorm = 126.5276, GNorm = 1.1454, lr_0 = 7.9427e-04
Loss = 1.2515e-01, PNorm = 126.6365, GNorm = 0.8265, lr_0 = 7.9373e-04
Loss = 1.4871e-01, PNorm = 126.7406, GNorm = 1.1416, lr_0 = 7.9319e-04
Loss = 1.7129e-01, PNorm = 126.8494, GNorm = 0.8727, lr_0 = 7.9264e-04
Loss = 1.6296e-01, PNorm = 126.9593, GNorm = 1.9393, lr_0 = 7.9210e-04
Loss = 1.4652e-01, PNorm = 127.0787, GNorm = 1.3607, lr_0 = 7.9156e-04
Loss = 1.6782e-01, PNorm = 127.2040, GNorm = 2.2913, lr_0 = 7.9101e-04
Loss = 1.4941e-01, PNorm = 127.3145, GNorm = 0.7690, lr_0 = 7.9047e-04
Loss = 1.5529e-01, PNorm = 127.4301, GNorm = 0.8423, lr_0 = 7.8993e-04
Loss = 1.5379e-01, PNorm = 127.5432, GNorm = 0.6520, lr_0 = 7.8939e-04
Loss = 1.4337e-01, PNorm = 127.6459, GNorm = 0.9488, lr_0 = 7.8885e-04
Loss = 1.6137e-01, PNorm = 127.7470, GNorm = 1.4678, lr_0 = 7.8831e-04
Loss = 1.7754e-01, PNorm = 127.8598, GNorm = 0.7350, lr_0 = 7.8777e-04
Loss = 1.4201e-01, PNorm = 127.9775, GNorm = 1.1561, lr_0 = 7.8723e-04
Loss = 1.4141e-01, PNorm = 128.0831, GNorm = 0.8565, lr_0 = 7.8669e-04
Loss = 1.6441e-01, PNorm = 128.1843, GNorm = 0.7660, lr_0 = 7.8615e-04
Loss = 1.8103e-01, PNorm = 128.2892, GNorm = 1.3917, lr_0 = 7.8561e-04
Loss = 1.2580e-01, PNorm = 128.3950, GNorm = 0.5090, lr_0 = 7.8507e-04
Loss = 1.5735e-01, PNorm = 128.4950, GNorm = 0.7209, lr_0 = 7.8454e-04
Loss = 1.2794e-01, PNorm = 128.5943, GNorm = 0.6616, lr_0 = 7.8400e-04
Loss = 1.6440e-01, PNorm = 128.6966, GNorm = 1.0311, lr_0 = 7.8346e-04
Loss = 1.4465e-01, PNorm = 128.8008, GNorm = 0.6414, lr_0 = 7.8293e-04
Loss = 1.4290e-01, PNorm = 128.9055, GNorm = 0.6681, lr_0 = 7.8239e-04
Loss = 1.3967e-01, PNorm = 129.0068, GNorm = 0.8683, lr_0 = 7.8185e-04
Loss = 1.5896e-01, PNorm = 129.1080, GNorm = 0.6072, lr_0 = 7.8132e-04
Validation mae = 0.124450
Epoch 5
Loss = 9.7818e-02, PNorm = 129.1987, GNorm = 0.8972, lr_0 = 7.8078e-04
Loss = 7.7160e-02, PNorm = 129.2692, GNorm = 0.5347, lr_0 = 7.8025e-04
Loss = 9.0265e-02, PNorm = 129.3406, GNorm = 0.6997, lr_0 = 7.7971e-04
Loss = 9.0697e-02, PNorm = 129.4072, GNorm = 0.5783, lr_0 = 7.7918e-04
Loss = 7.8549e-02, PNorm = 129.4733, GNorm = 0.6285, lr_0 = 7.7864e-04
Loss = 8.1198e-02, PNorm = 129.5381, GNorm = 0.4717, lr_0 = 7.7811e-04
Loss = 7.6222e-02, PNorm = 129.5996, GNorm = 0.5741, lr_0 = 7.7758e-04
Loss = 8.8754e-02, PNorm = 129.6595, GNorm = 0.6292, lr_0 = 7.7705e-04
Loss = 8.9303e-02, PNorm = 129.7228, GNorm = 0.6940, lr_0 = 7.7651e-04
Loss = 8.4407e-02, PNorm = 129.7871, GNorm = 0.8768, lr_0 = 7.7598e-04
Loss = 8.6261e-02, PNorm = 129.8390, GNorm = 1.0665, lr_0 = 7.7545e-04
Loss = 8.9755e-02, PNorm = 129.8916, GNorm = 0.5372, lr_0 = 7.7492e-04
Loss = 8.1767e-02, PNorm = 129.9544, GNorm = 0.6624, lr_0 = 7.7439e-04
Loss = 9.0023e-02, PNorm = 130.0086, GNorm = 0.7245, lr_0 = 7.7386e-04
Loss = 9.8833e-02, PNorm = 130.0716, GNorm = 0.7585, lr_0 = 7.7333e-04
Loss = 7.7170e-02, PNorm = 130.1344, GNorm = 0.4059, lr_0 = 7.7280e-04
Loss = 8.1410e-02, PNorm = 130.1914, GNorm = 0.8199, lr_0 = 7.7227e-04
Loss = 9.3111e-02, PNorm = 130.2567, GNorm = 0.5877, lr_0 = 7.7174e-04
Loss = 7.9951e-02, PNorm = 130.3224, GNorm = 0.4705, lr_0 = 7.7121e-04
Loss = 8.0942e-02, PNorm = 130.3885, GNorm = 0.9328, lr_0 = 7.7068e-04
Loss = 8.4939e-02, PNorm = 130.4500, GNorm = 0.8285, lr_0 = 7.7015e-04
Loss = 9.1199e-02, PNorm = 130.5058, GNorm = 1.0284, lr_0 = 7.6963e-04
Loss = 8.8070e-02, PNorm = 130.5850, GNorm = 0.4921, lr_0 = 7.6910e-04
Loss = 7.2464e-02, PNorm = 130.6519, GNorm = 0.6920, lr_0 = 7.6857e-04
Loss = 7.7076e-02, PNorm = 130.7126, GNorm = 0.4335, lr_0 = 7.6805e-04
Loss = 7.4871e-02, PNorm = 130.7734, GNorm = 0.4177, lr_0 = 7.6752e-04
Loss = 7.5324e-02, PNorm = 130.8279, GNorm = 0.4737, lr_0 = 7.6699e-04
Loss = 9.1924e-02, PNorm = 130.8931, GNorm = 0.5943, lr_0 = 7.6647e-04
Loss = 7.3433e-02, PNorm = 130.9613, GNorm = 0.5987, lr_0 = 7.6594e-04
Loss = 7.8075e-02, PNorm = 131.0234, GNorm = 0.6781, lr_0 = 7.6542e-04
Loss = 8.4375e-02, PNorm = 131.0819, GNorm = 0.6670, lr_0 = 7.6489e-04
Loss = 7.3577e-02, PNorm = 131.1478, GNorm = 0.6068, lr_0 = 7.6437e-04
Loss = 9.1263e-02, PNorm = 131.2108, GNorm = 0.4900, lr_0 = 7.6385e-04
Loss = 9.9342e-02, PNorm = 131.2707, GNorm = 0.7050, lr_0 = 7.6332e-04
Loss = 7.3335e-02, PNorm = 131.3298, GNorm = 0.7546, lr_0 = 7.6280e-04
Loss = 8.3777e-02, PNorm = 131.3915, GNorm = 0.8996, lr_0 = 7.6228e-04
Loss = 7.9311e-02, PNorm = 131.4534, GNorm = 0.4154, lr_0 = 7.6176e-04
Loss = 8.6149e-02, PNorm = 131.5139, GNorm = 1.1013, lr_0 = 7.6123e-04
Loss = 8.4147e-02, PNorm = 131.5802, GNorm = 0.6155, lr_0 = 7.6071e-04
Loss = 8.8120e-02, PNorm = 131.6371, GNorm = 0.5599, lr_0 = 7.6019e-04
Loss = 7.2983e-02, PNorm = 131.7000, GNorm = 0.7685, lr_0 = 7.5967e-04
Loss = 9.2339e-02, PNorm = 131.7616, GNorm = 0.4378, lr_0 = 7.5915e-04
Loss = 9.2625e-02, PNorm = 131.8251, GNorm = 0.5540, lr_0 = 7.5863e-04
Loss = 9.9218e-02, PNorm = 131.8950, GNorm = 0.8618, lr_0 = 7.5811e-04
Loss = 9.7092e-02, PNorm = 131.9675, GNorm = 0.6241, lr_0 = 7.5759e-04
Loss = 9.0512e-02, PNorm = 132.0457, GNorm = 0.4480, lr_0 = 7.5707e-04
Loss = 6.8877e-02, PNorm = 132.1143, GNorm = 0.3499, lr_0 = 7.5655e-04
Loss = 9.1221e-02, PNorm = 132.1840, GNorm = 1.2787, lr_0 = 7.5603e-04
Loss = 9.2743e-02, PNorm = 132.2592, GNorm = 0.7769, lr_0 = 7.5552e-04
Loss = 8.3835e-02, PNorm = 132.3283, GNorm = 0.4050, lr_0 = 7.5500e-04
Loss = 8.7286e-02, PNorm = 132.4053, GNorm = 1.0295, lr_0 = 7.5448e-04
Loss = 9.9099e-02, PNorm = 132.4803, GNorm = 1.1031, lr_0 = 7.5397e-04
Loss = 7.2388e-02, PNorm = 132.5541, GNorm = 0.4617, lr_0 = 7.5345e-04
Loss = 8.2171e-02, PNorm = 132.6311, GNorm = 0.8772, lr_0 = 7.5293e-04
Loss = 9.5396e-02, PNorm = 132.7045, GNorm = 0.5045, lr_0 = 7.5242e-04
Loss = 8.2300e-02, PNorm = 132.7810, GNorm = 0.9521, lr_0 = 7.5190e-04
Loss = 9.7503e-02, PNorm = 132.8564, GNorm = 1.3000, lr_0 = 7.5139e-04
Loss = 9.2466e-02, PNorm = 132.9300, GNorm = 0.4379, lr_0 = 7.5087e-04
Loss = 7.3498e-02, PNorm = 133.0105, GNorm = 0.8987, lr_0 = 7.5036e-04
Loss = 9.8174e-02, PNorm = 133.0799, GNorm = 0.5814, lr_0 = 7.4984e-04
Loss = 9.0352e-02, PNorm = 133.1508, GNorm = 0.7190, lr_0 = 7.4933e-04
Loss = 6.9599e-02, PNorm = 133.2233, GNorm = 0.5939, lr_0 = 7.4882e-04
Loss = 8.0204e-02, PNorm = 133.2930, GNorm = 0.4610, lr_0 = 7.4830e-04
Loss = 9.1515e-02, PNorm = 133.3660, GNorm = 0.6716, lr_0 = 7.4779e-04
Loss = 9.1228e-02, PNorm = 133.4437, GNorm = 0.7269, lr_0 = 7.4728e-04
Loss = 7.7663e-02, PNorm = 133.5163, GNorm = 0.4269, lr_0 = 7.4677e-04
Loss = 8.9357e-02, PNorm = 133.5875, GNorm = 0.5460, lr_0 = 7.4625e-04
Loss = 9.4777e-02, PNorm = 133.6689, GNorm = 0.4323, lr_0 = 7.4574e-04
Loss = 9.1945e-02, PNorm = 133.7492, GNorm = 0.6784, lr_0 = 7.4523e-04
Loss = 8.1346e-02, PNorm = 133.8256, GNorm = 0.8206, lr_0 = 7.4472e-04
Loss = 9.2286e-02, PNorm = 133.9016, GNorm = 0.4320, lr_0 = 7.4421e-04
Loss = 1.0444e-01, PNorm = 133.9815, GNorm = 0.5876, lr_0 = 7.4370e-04
Loss = 9.8609e-02, PNorm = 134.0581, GNorm = 0.6580, lr_0 = 7.4319e-04
Loss = 1.0619e-01, PNorm = 134.1478, GNorm = 0.7492, lr_0 = 7.4268e-04
Loss = 8.6326e-02, PNorm = 134.2236, GNorm = 0.5251, lr_0 = 7.4217e-04
Loss = 9.7670e-02, PNorm = 134.3079, GNorm = 0.6022, lr_0 = 7.4167e-04
Loss = 1.1383e-01, PNorm = 134.3831, GNorm = 1.0119, lr_0 = 7.4116e-04
Loss = 9.8289e-02, PNorm = 134.4726, GNorm = 0.6402, lr_0 = 7.4065e-04
Loss = 9.6404e-02, PNorm = 134.5623, GNorm = 0.6049, lr_0 = 7.4014e-04
Loss = 1.0214e-01, PNorm = 134.6436, GNorm = 0.4770, lr_0 = 7.3964e-04
Loss = 7.8899e-02, PNorm = 134.7286, GNorm = 1.1481, lr_0 = 7.3913e-04
Loss = 9.3860e-02, PNorm = 134.8057, GNorm = 0.5829, lr_0 = 7.3862e-04
Loss = 9.2013e-02, PNorm = 134.8758, GNorm = 0.7240, lr_0 = 7.3812e-04
Loss = 9.0476e-02, PNorm = 134.9553, GNorm = 0.5257, lr_0 = 7.3761e-04
Loss = 8.6898e-02, PNorm = 135.0316, GNorm = 0.6372, lr_0 = 7.3711e-04
Loss = 8.9405e-02, PNorm = 135.1105, GNorm = 0.6305, lr_0 = 7.3660e-04
Loss = 1.0438e-01, PNorm = 135.1914, GNorm = 0.8751, lr_0 = 7.3610e-04
Loss = 8.7410e-02, PNorm = 135.2701, GNorm = 0.5742, lr_0 = 7.3559e-04
Loss = 1.0199e-01, PNorm = 135.3530, GNorm = 0.8528, lr_0 = 7.3509e-04
Loss = 8.8704e-02, PNorm = 135.4427, GNorm = 0.8352, lr_0 = 7.3458e-04
Loss = 8.2582e-02, PNorm = 135.5226, GNorm = 0.7240, lr_0 = 7.3408e-04
Loss = 8.1781e-02, PNorm = 135.6020, GNorm = 0.5434, lr_0 = 7.3358e-04
Loss = 8.5350e-02, PNorm = 135.6799, GNorm = 0.5803, lr_0 = 7.3308e-04
Loss = 1.0197e-01, PNorm = 135.7590, GNorm = 0.6270, lr_0 = 7.3257e-04
Loss = 1.3045e-01, PNorm = 135.8377, GNorm = 0.8059, lr_0 = 7.3207e-04
Loss = 1.0431e-01, PNorm = 135.9330, GNorm = 0.7608, lr_0 = 7.3157e-04
Loss = 9.5567e-02, PNorm = 136.0250, GNorm = 0.7398, lr_0 = 7.3107e-04
Loss = 8.7925e-02, PNorm = 136.1053, GNorm = 0.4813, lr_0 = 7.3057e-04
Loss = 1.0390e-01, PNorm = 136.1931, GNorm = 0.8563, lr_0 = 7.3007e-04
Loss = 1.0969e-01, PNorm = 136.2771, GNorm = 0.6074, lr_0 = 7.2957e-04
Loss = 1.0060e-01, PNorm = 136.3550, GNorm = 0.5331, lr_0 = 7.2907e-04
Loss = 9.5709e-02, PNorm = 136.4314, GNorm = 0.5737, lr_0 = 7.2857e-04
Loss = 8.8149e-02, PNorm = 136.4974, GNorm = 0.4310, lr_0 = 7.2807e-04
Loss = 1.0650e-01, PNorm = 136.5772, GNorm = 0.6055, lr_0 = 7.2757e-04
Loss = 8.5395e-02, PNorm = 136.6648, GNorm = 0.7204, lr_0 = 7.2707e-04
Loss = 9.8010e-02, PNorm = 136.7386, GNorm = 0.6220, lr_0 = 7.2657e-04
Loss = 8.6981e-02, PNorm = 136.8198, GNorm = 0.9512, lr_0 = 7.2608e-04
Loss = 9.5083e-02, PNorm = 136.8969, GNorm = 0.7325, lr_0 = 7.2558e-04
Loss = 9.6042e-02, PNorm = 136.9741, GNorm = 0.7213, lr_0 = 7.2508e-04
Loss = 8.8848e-02, PNorm = 137.0530, GNorm = 0.7520, lr_0 = 7.2458e-04
Loss = 8.2414e-02, PNorm = 137.1381, GNorm = 0.3711, lr_0 = 7.2409e-04
Loss = 9.0492e-02, PNorm = 137.2235, GNorm = 0.6562, lr_0 = 7.2359e-04
Loss = 9.7598e-02, PNorm = 137.3056, GNorm = 0.8536, lr_0 = 7.2310e-04
Loss = 9.1027e-02, PNorm = 137.3928, GNorm = 0.7865, lr_0 = 7.2260e-04
Loss = 9.6417e-02, PNorm = 137.4783, GNorm = 0.4758, lr_0 = 7.2211e-04
Loss = 9.0230e-02, PNorm = 137.5600, GNorm = 0.7597, lr_0 = 7.2161e-04
Loss = 1.1392e-01, PNorm = 137.6387, GNorm = 0.5105, lr_0 = 7.2112e-04
Loss = 9.2675e-02, PNorm = 137.7276, GNorm = 0.7012, lr_0 = 7.2062e-04
Loss = 1.0673e-01, PNorm = 137.8110, GNorm = 0.6787, lr_0 = 7.2013e-04
Loss = 1.1456e-01, PNorm = 137.8978, GNorm = 0.8347, lr_0 = 7.1964e-04
Validation mae = 0.123694
Epoch 6
Loss = 7.1246e-02, PNorm = 137.9738, GNorm = 0.4075, lr_0 = 7.1914e-04
Loss = 6.5195e-02, PNorm = 138.0372, GNorm = 0.9104, lr_0 = 7.1865e-04
Loss = 6.8791e-02, PNorm = 138.1014, GNorm = 0.8691, lr_0 = 7.1816e-04
Loss = 7.1533e-02, PNorm = 138.1650, GNorm = 0.3715, lr_0 = 7.1767e-04
Loss = 7.5671e-02, PNorm = 138.2253, GNorm = 1.0560, lr_0 = 7.1717e-04
Loss = 7.1809e-02, PNorm = 138.2810, GNorm = 0.8402, lr_0 = 7.1668e-04
Loss = 9.4529e-02, PNorm = 138.3406, GNorm = 0.4421, lr_0 = 7.1619e-04
Loss = 7.4071e-02, PNorm = 138.4020, GNorm = 0.5355, lr_0 = 7.1570e-04
Loss = 6.7457e-02, PNorm = 138.4544, GNorm = 0.5321, lr_0 = 7.1521e-04
Loss = 8.5133e-02, PNorm = 138.5145, GNorm = 1.6226, lr_0 = 7.1472e-04
Loss = 6.2174e-02, PNorm = 138.5687, GNorm = 0.4045, lr_0 = 7.1423e-04
Loss = 5.6605e-02, PNorm = 138.6183, GNorm = 0.3663, lr_0 = 7.1374e-04
Loss = 5.5420e-02, PNorm = 138.6726, GNorm = 0.7927, lr_0 = 7.1325e-04
Loss = 6.4477e-02, PNorm = 138.7232, GNorm = 0.7523, lr_0 = 7.1277e-04
Loss = 7.3715e-02, PNorm = 138.7895, GNorm = 1.1166, lr_0 = 7.1228e-04
Loss = 5.6643e-02, PNorm = 138.8441, GNorm = 0.3918, lr_0 = 7.1179e-04
Loss = 6.2624e-02, PNorm = 138.8988, GNorm = 0.5602, lr_0 = 7.1130e-04
Loss = 6.9175e-02, PNorm = 138.9446, GNorm = 1.5601, lr_0 = 7.1081e-04
Loss = 6.9897e-02, PNorm = 138.9969, GNorm = 2.0107, lr_0 = 7.1033e-04
Loss = 6.3275e-02, PNorm = 139.0594, GNorm = 0.4860, lr_0 = 7.0984e-04
Loss = 5.5709e-02, PNorm = 139.1161, GNorm = 0.5346, lr_0 = 7.0935e-04
Loss = 5.2907e-02, PNorm = 139.1721, GNorm = 0.3267, lr_0 = 7.0887e-04
Loss = 5.6462e-02, PNorm = 139.2205, GNorm = 0.6940, lr_0 = 7.0838e-04
Loss = 6.6700e-02, PNorm = 139.2746, GNorm = 0.5358, lr_0 = 7.0790e-04
Loss = 5.4175e-02, PNorm = 139.3298, GNorm = 0.7989, lr_0 = 7.0741e-04
Loss = 5.8011e-02, PNorm = 139.3829, GNorm = 0.7019, lr_0 = 7.0693e-04
Loss = 6.2157e-02, PNorm = 139.4327, GNorm = 0.4143, lr_0 = 7.0644e-04
Loss = 6.5373e-02, PNorm = 139.4838, GNorm = 0.5576, lr_0 = 7.0596e-04
Loss = 6.2526e-02, PNorm = 139.5369, GNorm = 0.5370, lr_0 = 7.0548e-04
Loss = 6.1627e-02, PNorm = 139.5889, GNorm = 0.6874, lr_0 = 7.0499e-04
Loss = 6.0700e-02, PNorm = 139.6431, GNorm = 0.3352, lr_0 = 7.0451e-04
Loss = 6.4038e-02, PNorm = 139.6965, GNorm = 0.5376, lr_0 = 7.0403e-04
Loss = 5.9088e-02, PNorm = 139.7566, GNorm = 0.4218, lr_0 = 7.0354e-04
Loss = 5.0519e-02, PNorm = 139.8091, GNorm = 0.3662, lr_0 = 7.0306e-04
Loss = 5.8451e-02, PNorm = 139.8551, GNorm = 0.4728, lr_0 = 7.0258e-04
Loss = 6.3641e-02, PNorm = 139.9085, GNorm = 0.9445, lr_0 = 7.0210e-04
Loss = 6.0934e-02, PNorm = 139.9663, GNorm = 0.4867, lr_0 = 7.0162e-04
Loss = 6.9744e-02, PNorm = 140.0295, GNorm = 0.4275, lr_0 = 7.0114e-04
Loss = 6.2105e-02, PNorm = 140.0849, GNorm = 0.4629, lr_0 = 7.0066e-04
Loss = 6.2757e-02, PNorm = 140.1444, GNorm = 0.5840, lr_0 = 7.0018e-04
Loss = 5.1676e-02, PNorm = 140.2085, GNorm = 0.4484, lr_0 = 6.9970e-04
Loss = 5.8428e-02, PNorm = 140.2669, GNorm = 0.3731, lr_0 = 6.9922e-04
Loss = 6.1350e-02, PNorm = 140.3292, GNorm = 0.6157, lr_0 = 6.9874e-04
Loss = 6.0021e-02, PNorm = 140.3831, GNorm = 0.9564, lr_0 = 6.9826e-04
Loss = 6.1625e-02, PNorm = 140.4406, GNorm = 0.4934, lr_0 = 6.9778e-04
Loss = 5.7714e-02, PNorm = 140.5009, GNorm = 0.9182, lr_0 = 6.9730e-04
Loss = 5.4937e-02, PNorm = 140.5528, GNorm = 0.4911, lr_0 = 6.9683e-04
Loss = 6.2263e-02, PNorm = 140.6070, GNorm = 0.3671, lr_0 = 6.9635e-04
Loss = 5.6925e-02, PNorm = 140.6567, GNorm = 0.4850, lr_0 = 6.9587e-04
Loss = 6.5677e-02, PNorm = 140.7143, GNorm = 0.4914, lr_0 = 6.9540e-04
Loss = 4.9225e-02, PNorm = 140.7634, GNorm = 0.5345, lr_0 = 6.9492e-04
Loss = 6.0499e-02, PNorm = 140.8180, GNorm = 0.6139, lr_0 = 6.9444e-04
Loss = 5.8601e-02, PNorm = 140.8725, GNorm = 0.4258, lr_0 = 6.9397e-04
Loss = 6.8839e-02, PNorm = 140.9318, GNorm = 0.4824, lr_0 = 6.9349e-04
Loss = 6.8687e-02, PNorm = 140.9899, GNorm = 1.1698, lr_0 = 6.9302e-04
Loss = 5.9443e-02, PNorm = 141.0548, GNorm = 0.4543, lr_0 = 6.9254e-04
Loss = 6.3714e-02, PNorm = 141.1151, GNorm = 0.6359, lr_0 = 6.9207e-04
Loss = 6.8878e-02, PNorm = 141.1755, GNorm = 0.6874, lr_0 = 6.9159e-04
Loss = 7.0830e-02, PNorm = 141.2382, GNorm = 0.3904, lr_0 = 6.9112e-04
Loss = 5.7678e-02, PNorm = 141.2982, GNorm = 0.8732, lr_0 = 6.9065e-04
Loss = 4.7539e-02, PNorm = 141.3547, GNorm = 0.3997, lr_0 = 6.9017e-04
Loss = 7.3620e-02, PNorm = 141.4051, GNorm = 0.6428, lr_0 = 6.8970e-04
Loss = 6.3051e-02, PNorm = 141.4619, GNorm = 0.6433, lr_0 = 6.8923e-04
Loss = 5.5389e-02, PNorm = 141.5182, GNorm = 0.4562, lr_0 = 6.8876e-04
Loss = 9.7446e-02, PNorm = 141.5802, GNorm = 0.8945, lr_0 = 6.8828e-04
Loss = 6.1136e-02, PNorm = 141.6411, GNorm = 0.7682, lr_0 = 6.8781e-04
Loss = 6.4650e-02, PNorm = 141.7027, GNorm = 0.5483, lr_0 = 6.8734e-04
Loss = 8.9675e-02, PNorm = 141.7652, GNorm = 0.5217, lr_0 = 6.8687e-04
Loss = 6.6879e-02, PNorm = 141.8177, GNorm = 0.5044, lr_0 = 6.8640e-04
Loss = 6.4781e-02, PNorm = 141.8811, GNorm = 0.4148, lr_0 = 6.8593e-04
Loss = 8.1164e-02, PNorm = 141.9485, GNorm = 0.4573, lr_0 = 6.8546e-04
Loss = 7.8397e-02, PNorm = 142.0191, GNorm = 0.4558, lr_0 = 6.8499e-04
Loss = 7.2179e-02, PNorm = 142.0924, GNorm = 0.6072, lr_0 = 6.8452e-04
Loss = 6.2544e-02, PNorm = 142.1574, GNorm = 0.5970, lr_0 = 6.8405e-04
Loss = 6.7797e-02, PNorm = 142.2260, GNorm = 1.0030, lr_0 = 6.8358e-04
Loss = 6.3305e-02, PNorm = 142.2927, GNorm = 0.7738, lr_0 = 6.8312e-04
Loss = 6.4472e-02, PNorm = 142.3579, GNorm = 0.7994, lr_0 = 6.8265e-04
Loss = 7.6215e-02, PNorm = 142.4223, GNorm = 0.4952, lr_0 = 6.8218e-04
Loss = 7.3719e-02, PNorm = 142.4913, GNorm = 0.4021, lr_0 = 6.8171e-04
Loss = 6.4116e-02, PNorm = 142.5656, GNorm = 0.4539, lr_0 = 6.8125e-04
Loss = 7.4967e-02, PNorm = 142.6373, GNorm = 0.5186, lr_0 = 6.8078e-04
Loss = 7.1701e-02, PNorm = 142.7074, GNorm = 0.6481, lr_0 = 6.8031e-04
Loss = 5.6145e-02, PNorm = 142.7768, GNorm = 0.4174, lr_0 = 6.7985e-04
Loss = 6.1424e-02, PNorm = 142.8388, GNorm = 0.6349, lr_0 = 6.7938e-04
Loss = 5.9862e-02, PNorm = 142.9019, GNorm = 0.4634, lr_0 = 6.7892e-04
Loss = 5.3311e-02, PNorm = 142.9630, GNorm = 0.5443, lr_0 = 6.7845e-04
Loss = 7.3540e-02, PNorm = 143.0256, GNorm = 0.6017, lr_0 = 6.7799e-04
Loss = 6.4830e-02, PNorm = 143.0848, GNorm = 0.7344, lr_0 = 6.7752e-04
Loss = 5.5816e-02, PNorm = 143.1523, GNorm = 0.5135, lr_0 = 6.7706e-04
Loss = 5.7262e-02, PNorm = 143.2140, GNorm = 0.5721, lr_0 = 6.7659e-04
Loss = 5.7584e-02, PNorm = 143.2710, GNorm = 0.3544, lr_0 = 6.7613e-04
Loss = 6.5754e-02, PNorm = 143.3296, GNorm = 0.7036, lr_0 = 6.7567e-04
Loss = 6.1370e-02, PNorm = 143.3955, GNorm = 0.5567, lr_0 = 6.7520e-04
Loss = 7.1194e-02, PNorm = 143.4570, GNorm = 0.5697, lr_0 = 6.7474e-04
Loss = 8.1563e-02, PNorm = 143.5219, GNorm = 0.5686, lr_0 = 6.7428e-04
Loss = 6.1102e-02, PNorm = 143.5850, GNorm = 0.3517, lr_0 = 6.7382e-04
Loss = 6.9956e-02, PNorm = 143.6482, GNorm = 0.4798, lr_0 = 6.7335e-04
Loss = 7.1485e-02, PNorm = 143.7198, GNorm = 0.4959, lr_0 = 6.7289e-04
Loss = 8.8546e-02, PNorm = 143.7858, GNorm = 0.9861, lr_0 = 6.7243e-04
Loss = 7.3111e-02, PNorm = 143.8683, GNorm = 0.5907, lr_0 = 6.7197e-04
Loss = 7.6464e-02, PNorm = 143.9497, GNorm = 0.5622, lr_0 = 6.7151e-04
Loss = 6.8489e-02, PNorm = 144.0272, GNorm = 0.5864, lr_0 = 6.7105e-04
Loss = 6.3186e-02, PNorm = 144.1022, GNorm = 0.4668, lr_0 = 6.7059e-04
Loss = 8.3882e-02, PNorm = 144.1638, GNorm = 0.6136, lr_0 = 6.7013e-04
Loss = 8.1587e-02, PNorm = 144.2383, GNorm = 1.0817, lr_0 = 6.6967e-04
Loss = 7.8658e-02, PNorm = 144.3211, GNorm = 0.4548, lr_0 = 6.6921e-04
Loss = 6.2976e-02, PNorm = 144.3963, GNorm = 0.4576, lr_0 = 6.6876e-04
Loss = 6.2548e-02, PNorm = 144.4736, GNorm = 0.4727, lr_0 = 6.6830e-04
Loss = 7.6677e-02, PNorm = 144.5405, GNorm = 0.6249, lr_0 = 6.6784e-04
Loss = 6.7892e-02, PNorm = 144.6185, GNorm = 0.4963, lr_0 = 6.6738e-04
Loss = 7.5677e-02, PNorm = 144.6911, GNorm = 2.3106, lr_0 = 6.6693e-04
Loss = 6.4602e-02, PNorm = 144.7577, GNorm = 0.8198, lr_0 = 6.6647e-04
Loss = 6.3453e-02, PNorm = 144.8302, GNorm = 0.5149, lr_0 = 6.6601e-04
Loss = 7.5716e-02, PNorm = 144.8960, GNorm = 0.5403, lr_0 = 6.6556e-04
Loss = 7.1597e-02, PNorm = 144.9671, GNorm = 0.4593, lr_0 = 6.6510e-04
Loss = 9.5107e-02, PNorm = 145.0446, GNorm = 0.6624, lr_0 = 6.6464e-04
Loss = 6.6845e-02, PNorm = 145.1260, GNorm = 0.7042, lr_0 = 6.6419e-04
Loss = 7.8774e-02, PNorm = 145.2043, GNorm = 0.6230, lr_0 = 6.6373e-04
Loss = 6.9377e-02, PNorm = 145.2819, GNorm = 0.7285, lr_0 = 6.6328e-04
Loss = 8.1302e-02, PNorm = 145.3564, GNorm = 0.6799, lr_0 = 6.6282e-04
Validation mae = 0.124406
Epoch 7
Loss = 5.4280e-02, PNorm = 145.4263, GNorm = 1.0504, lr_0 = 6.6237e-04
Loss = 5.2153e-02, PNorm = 145.4787, GNorm = 0.9576, lr_0 = 6.6192e-04
Loss = 5.3012e-02, PNorm = 145.5259, GNorm = 0.4658, lr_0 = 6.6146e-04
Loss = 5.3278e-02, PNorm = 145.5680, GNorm = 0.4675, lr_0 = 6.6101e-04
Loss = 5.1586e-02, PNorm = 145.6199, GNorm = 0.3556, lr_0 = 6.6056e-04
Loss = 4.7410e-02, PNorm = 145.6656, GNorm = 0.4608, lr_0 = 6.6011e-04
Loss = 7.7485e-02, PNorm = 145.7143, GNorm = 0.8019, lr_0 = 6.5965e-04
Loss = 4.8126e-02, PNorm = 145.7695, GNorm = 0.6085, lr_0 = 6.5920e-04
Loss = 4.6964e-02, PNorm = 145.8152, GNorm = 0.3954, lr_0 = 6.5875e-04
Loss = 4.1176e-02, PNorm = 145.8657, GNorm = 0.7012, lr_0 = 6.5830e-04
Loss = 5.0246e-02, PNorm = 145.9112, GNorm = 0.6410, lr_0 = 6.5785e-04
Loss = 5.0735e-02, PNorm = 145.9563, GNorm = 0.3698, lr_0 = 6.5740e-04
Loss = 4.0491e-02, PNorm = 146.0042, GNorm = 0.4286, lr_0 = 6.5695e-04
Loss = 4.5884e-02, PNorm = 146.0432, GNorm = 0.4701, lr_0 = 6.5650e-04
Loss = 5.0220e-02, PNorm = 146.0821, GNorm = 0.4837, lr_0 = 6.5605e-04
Loss = 4.8764e-02, PNorm = 146.1170, GNorm = 0.3720, lr_0 = 6.5560e-04
Loss = 4.7990e-02, PNorm = 146.1592, GNorm = 0.4721, lr_0 = 6.5515e-04
Loss = 4.9328e-02, PNorm = 146.1990, GNorm = 0.5162, lr_0 = 6.5470e-04
Loss = 4.5972e-02, PNorm = 146.2437, GNorm = 0.6071, lr_0 = 6.5425e-04
Loss = 6.1291e-02, PNorm = 146.2874, GNorm = 0.3684, lr_0 = 6.5380e-04
Loss = 4.3873e-02, PNorm = 146.3385, GNorm = 0.3831, lr_0 = 6.5335e-04
Loss = 4.3548e-02, PNorm = 146.3810, GNorm = 0.5858, lr_0 = 6.5291e-04
Loss = 4.1313e-02, PNorm = 146.4297, GNorm = 0.3586, lr_0 = 6.5246e-04
Loss = 4.4722e-02, PNorm = 146.4773, GNorm = 0.5228, lr_0 = 6.5201e-04
Loss = 4.2705e-02, PNorm = 146.5231, GNorm = 0.3316, lr_0 = 6.5157e-04
Loss = 5.1034e-02, PNorm = 146.5658, GNorm = 0.3999, lr_0 = 6.5112e-04
Loss = 4.3933e-02, PNorm = 146.6162, GNorm = 0.5960, lr_0 = 6.5067e-04
Loss = 4.0049e-02, PNorm = 146.6597, GNorm = 0.6079, lr_0 = 6.5023e-04
Loss = 5.1521e-02, PNorm = 146.7077, GNorm = 0.4789, lr_0 = 6.4978e-04
Loss = 5.4623e-02, PNorm = 146.7554, GNorm = 0.4826, lr_0 = 6.4934e-04
Loss = 6.2049e-02, PNorm = 146.8085, GNorm = 1.0660, lr_0 = 6.4889e-04
Loss = 4.6487e-02, PNorm = 146.8632, GNorm = 0.6423, lr_0 = 6.4845e-04
Loss = 5.4552e-02, PNorm = 146.9092, GNorm = 0.5302, lr_0 = 6.4800e-04
Loss = 5.2561e-02, PNorm = 146.9514, GNorm = 0.5146, lr_0 = 6.4756e-04
Loss = 5.9077e-02, PNorm = 146.9954, GNorm = 0.6666, lr_0 = 6.4712e-04
Loss = 4.8002e-02, PNorm = 147.0456, GNorm = 0.8116, lr_0 = 6.4667e-04
Loss = 4.2822e-02, PNorm = 147.0906, GNorm = 0.3381, lr_0 = 6.4623e-04
Loss = 4.4628e-02, PNorm = 147.1420, GNorm = 0.4056, lr_0 = 6.4579e-04
Loss = 4.6120e-02, PNorm = 147.1969, GNorm = 0.4056, lr_0 = 6.4534e-04
Loss = 3.7319e-02, PNorm = 147.2426, GNorm = 0.4142, lr_0 = 6.4490e-04
Loss = 5.8192e-02, PNorm = 147.2898, GNorm = 0.4237, lr_0 = 6.4446e-04
Loss = 4.3366e-02, PNorm = 147.3381, GNorm = 0.4272, lr_0 = 6.4402e-04
Loss = 4.4286e-02, PNorm = 147.3868, GNorm = 0.4689, lr_0 = 6.4358e-04
Loss = 6.3921e-02, PNorm = 147.4433, GNorm = 0.4834, lr_0 = 6.4314e-04
Loss = 5.7680e-02, PNorm = 147.5004, GNorm = 0.4642, lr_0 = 6.4270e-04
Loss = 5.3116e-02, PNorm = 147.5568, GNorm = 0.6196, lr_0 = 6.4226e-04
Loss = 5.2662e-02, PNorm = 147.6114, GNorm = 0.6299, lr_0 = 6.4182e-04
Loss = 4.3713e-02, PNorm = 147.6623, GNorm = 0.4476, lr_0 = 6.4138e-04
Loss = 6.8860e-02, PNorm = 147.7127, GNorm = 0.6547, lr_0 = 6.4094e-04
Loss = 5.2845e-02, PNorm = 147.7758, GNorm = 0.4090, lr_0 = 6.4050e-04
Loss = 4.8539e-02, PNorm = 147.8331, GNorm = 0.6005, lr_0 = 6.4006e-04
Loss = 5.2970e-02, PNorm = 147.8891, GNorm = 0.4189, lr_0 = 6.3962e-04
Loss = 5.5003e-02, PNorm = 147.9462, GNorm = 0.5598, lr_0 = 6.3918e-04
Loss = 4.6303e-02, PNorm = 147.9976, GNorm = 0.2910, lr_0 = 6.3874e-04
Loss = 5.3664e-02, PNorm = 148.0465, GNorm = 0.5803, lr_0 = 6.3831e-04
Loss = 4.3035e-02, PNorm = 148.0920, GNorm = 0.3449, lr_0 = 6.3787e-04
Loss = 4.6001e-02, PNorm = 148.1454, GNorm = 0.4575, lr_0 = 6.3743e-04
Loss = 4.9535e-02, PNorm = 148.1975, GNorm = 0.4249, lr_0 = 6.3700e-04
Loss = 5.7636e-02, PNorm = 148.2548, GNorm = 1.4712, lr_0 = 6.3656e-04
Loss = 5.0225e-02, PNorm = 148.3089, GNorm = 0.4891, lr_0 = 6.3612e-04
Loss = 5.8526e-02, PNorm = 148.3672, GNorm = 0.3187, lr_0 = 6.3569e-04
Loss = 4.5470e-02, PNorm = 148.4292, GNorm = 0.5906, lr_0 = 6.3525e-04
Loss = 6.3218e-02, PNorm = 148.4839, GNorm = 1.1527, lr_0 = 6.3482e-04
Loss = 5.1365e-02, PNorm = 148.5412, GNorm = 0.4845, lr_0 = 6.3438e-04
Loss = 5.5489e-02, PNorm = 148.6014, GNorm = 0.4478, lr_0 = 6.3395e-04
Loss = 4.7779e-02, PNorm = 148.6573, GNorm = 1.0059, lr_0 = 6.3351e-04
Loss = 5.1609e-02, PNorm = 148.7102, GNorm = 0.4419, lr_0 = 6.3308e-04
Loss = 5.2102e-02, PNorm = 148.7681, GNorm = 0.5820, lr_0 = 6.3265e-04
Loss = 4.8079e-02, PNorm = 148.8223, GNorm = 0.6993, lr_0 = 6.3221e-04
Loss = 4.7444e-02, PNorm = 148.8699, GNorm = 0.5328, lr_0 = 6.3178e-04
Loss = 4.9996e-02, PNorm = 148.9230, GNorm = 0.4023, lr_0 = 6.3135e-04
Loss = 5.4400e-02, PNorm = 148.9771, GNorm = 0.3949, lr_0 = 6.3091e-04
Loss = 5.5201e-02, PNorm = 149.0326, GNorm = 0.4046, lr_0 = 6.3048e-04
Loss = 4.8916e-02, PNorm = 149.0847, GNorm = 0.8135, lr_0 = 6.3005e-04
Loss = 5.0363e-02, PNorm = 149.1406, GNorm = 0.4712, lr_0 = 6.2962e-04
Loss = 4.7765e-02, PNorm = 149.1931, GNorm = 0.3348, lr_0 = 6.2919e-04
Loss = 4.3916e-02, PNorm = 149.2417, GNorm = 0.7370, lr_0 = 6.2876e-04
Loss = 4.0614e-02, PNorm = 149.2959, GNorm = 0.4705, lr_0 = 6.2833e-04
Loss = 4.6998e-02, PNorm = 149.3492, GNorm = 0.5085, lr_0 = 6.2789e-04
Loss = 4.9493e-02, PNorm = 149.4001, GNorm = 0.6923, lr_0 = 6.2746e-04
Loss = 5.7491e-02, PNorm = 149.4584, GNorm = 0.4910, lr_0 = 6.2703e-04
Loss = 5.3448e-02, PNorm = 149.5213, GNorm = 0.5442, lr_0 = 6.2661e-04
Loss = 4.5332e-02, PNorm = 149.5815, GNorm = 0.3708, lr_0 = 6.2618e-04
Loss = 5.5947e-02, PNorm = 149.6325, GNorm = 0.4578, lr_0 = 6.2575e-04
Loss = 5.3439e-02, PNorm = 149.6880, GNorm = 0.6390, lr_0 = 6.2532e-04
Loss = 6.4350e-02, PNorm = 149.7530, GNorm = 0.3067, lr_0 = 6.2489e-04
Loss = 4.7441e-02, PNorm = 149.8148, GNorm = 0.7232, lr_0 = 6.2446e-04
Loss = 4.6952e-02, PNorm = 149.8658, GNorm = 0.7950, lr_0 = 6.2403e-04
Loss = 4.9037e-02, PNorm = 149.9176, GNorm = 0.5583, lr_0 = 6.2361e-04
Loss = 5.0930e-02, PNorm = 149.9723, GNorm = 0.7476, lr_0 = 6.2318e-04
Loss = 5.0651e-02, PNorm = 150.0179, GNorm = 0.6022, lr_0 = 6.2275e-04
Loss = 5.0161e-02, PNorm = 150.0704, GNorm = 0.4014, lr_0 = 6.2233e-04
Loss = 5.5564e-02, PNorm = 150.1194, GNorm = 0.5248, lr_0 = 6.2190e-04
Loss = 6.3067e-02, PNorm = 150.1773, GNorm = 0.3924, lr_0 = 6.2147e-04
Loss = 4.9848e-02, PNorm = 150.2347, GNorm = 0.3599, lr_0 = 6.2105e-04
Loss = 5.1476e-02, PNorm = 150.2957, GNorm = 0.4490, lr_0 = 6.2062e-04
Loss = 4.4302e-02, PNorm = 150.3536, GNorm = 0.4017, lr_0 = 6.2020e-04
Loss = 4.4168e-02, PNorm = 150.4056, GNorm = 0.6020, lr_0 = 6.1977e-04
Loss = 5.1062e-02, PNorm = 150.4564, GNorm = 0.6947, lr_0 = 6.1935e-04
Loss = 4.8729e-02, PNorm = 150.5085, GNorm = 0.3281, lr_0 = 6.1892e-04
Loss = 5.1328e-02, PNorm = 150.5601, GNorm = 0.4440, lr_0 = 6.1850e-04
Loss = 5.8030e-02, PNorm = 150.6133, GNorm = 0.8928, lr_0 = 6.1808e-04
Loss = 4.8207e-02, PNorm = 150.6703, GNorm = 0.4276, lr_0 = 6.1765e-04
Loss = 5.2494e-02, PNorm = 150.7272, GNorm = 0.5993, lr_0 = 6.1723e-04
Loss = 5.0311e-02, PNorm = 150.7847, GNorm = 0.8728, lr_0 = 6.1681e-04
Loss = 5.2866e-02, PNorm = 150.8423, GNorm = 0.3236, lr_0 = 6.1638e-04
Loss = 4.9594e-02, PNorm = 150.8969, GNorm = 0.4150, lr_0 = 6.1596e-04
Loss = 6.2109e-02, PNorm = 150.9582, GNorm = 0.6369, lr_0 = 6.1554e-04
Loss = 5.0602e-02, PNorm = 151.0184, GNorm = 0.3717, lr_0 = 6.1512e-04
Loss = 5.1372e-02, PNorm = 151.0741, GNorm = 0.4187, lr_0 = 6.1470e-04
Loss = 5.5414e-02, PNorm = 151.1360, GNorm = 0.5056, lr_0 = 6.1428e-04
Loss = 5.1708e-02, PNorm = 151.1942, GNorm = 0.5236, lr_0 = 6.1385e-04
Loss = 4.8958e-02, PNorm = 151.2578, GNorm = 0.3493, lr_0 = 6.1343e-04
Loss = 5.3319e-02, PNorm = 151.3124, GNorm = 0.3194, lr_0 = 6.1301e-04
Loss = 5.1772e-02, PNorm = 151.3702, GNorm = 0.5190, lr_0 = 6.1259e-04
Loss = 4.6260e-02, PNorm = 151.4255, GNorm = 0.3727, lr_0 = 6.1217e-04
Loss = 5.0309e-02, PNorm = 151.4747, GNorm = 0.6087, lr_0 = 6.1175e-04
Loss = 6.5515e-02, PNorm = 151.5335, GNorm = 0.5983, lr_0 = 6.1134e-04
Loss = 6.1277e-02, PNorm = 151.5943, GNorm = 0.5217, lr_0 = 6.1092e-04
Loss = 5.0180e-02, PNorm = 151.6599, GNorm = 0.7316, lr_0 = 6.1050e-04
Validation mae = 0.123147
Epoch 8
Loss = 5.3832e-02, PNorm = 151.7125, GNorm = 0.3963, lr_0 = 6.1008e-04
Loss = 3.4518e-02, PNorm = 151.7593, GNorm = 0.2315, lr_0 = 6.0966e-04
Loss = 3.9340e-02, PNorm = 151.8032, GNorm = 0.3856, lr_0 = 6.0924e-04
Loss = 6.2152e-02, PNorm = 151.8465, GNorm = 0.5248, lr_0 = 6.0883e-04
Loss = 4.9754e-02, PNorm = 151.8895, GNorm = 0.3421, lr_0 = 6.0841e-04
Loss = 4.4442e-02, PNorm = 151.9323, GNorm = 0.5620, lr_0 = 6.0799e-04
Loss = 3.5947e-02, PNorm = 151.9726, GNorm = 0.6616, lr_0 = 6.0758e-04
Loss = 4.3839e-02, PNorm = 152.0122, GNorm = 0.3865, lr_0 = 6.0716e-04
Loss = 3.7113e-02, PNorm = 152.0513, GNorm = 0.5885, lr_0 = 6.0674e-04
Loss = 3.0525e-02, PNorm = 152.0907, GNorm = 0.4954, lr_0 = 6.0633e-04
Loss = 3.7425e-02, PNorm = 152.1245, GNorm = 0.3756, lr_0 = 6.0591e-04
Loss = 4.0645e-02, PNorm = 152.1624, GNorm = 0.2731, lr_0 = 6.0550e-04
Loss = 4.3699e-02, PNorm = 152.2029, GNorm = 0.5204, lr_0 = 6.0508e-04
Loss = 3.5766e-02, PNorm = 152.2460, GNorm = 0.7884, lr_0 = 6.0467e-04
Loss = 4.1014e-02, PNorm = 152.2843, GNorm = 0.3530, lr_0 = 6.0425e-04
Loss = 4.2334e-02, PNorm = 152.3209, GNorm = 0.3885, lr_0 = 6.0384e-04
Loss = 3.1475e-02, PNorm = 152.3531, GNorm = 0.2931, lr_0 = 6.0343e-04
Loss = 3.5059e-02, PNorm = 152.3866, GNorm = 0.3135, lr_0 = 6.0301e-04
Loss = 4.1578e-02, PNorm = 152.4319, GNorm = 0.7516, lr_0 = 6.0260e-04
Loss = 3.6843e-02, PNorm = 152.4698, GNorm = 0.4712, lr_0 = 6.0219e-04
Loss = 3.6329e-02, PNorm = 152.5060, GNorm = 0.3027, lr_0 = 6.0178e-04
Loss = 3.9378e-02, PNorm = 152.5465, GNorm = 0.4141, lr_0 = 6.0136e-04
Loss = 3.7118e-02, PNorm = 152.5873, GNorm = 0.3768, lr_0 = 6.0095e-04
Loss = 3.9279e-02, PNorm = 152.6185, GNorm = 0.3397, lr_0 = 6.0054e-04
Loss = 3.3307e-02, PNorm = 152.6603, GNorm = 0.3891, lr_0 = 6.0013e-04
Loss = 4.7265e-02, PNorm = 152.6946, GNorm = 0.4299, lr_0 = 5.9972e-04
Loss = 3.9749e-02, PNorm = 152.7331, GNorm = 0.3864, lr_0 = 5.9931e-04
Loss = 3.8722e-02, PNorm = 152.7695, GNorm = 0.6123, lr_0 = 5.9890e-04
Loss = 4.1587e-02, PNorm = 152.8094, GNorm = 0.4203, lr_0 = 5.9849e-04
Loss = 4.4050e-02, PNorm = 152.8512, GNorm = 0.3837, lr_0 = 5.9808e-04
Loss = 4.6004e-02, PNorm = 152.8838, GNorm = 0.4655, lr_0 = 5.9767e-04
Loss = 3.8623e-02, PNorm = 152.9287, GNorm = 1.0348, lr_0 = 5.9726e-04
Loss = 4.1239e-02, PNorm = 152.9622, GNorm = 0.8210, lr_0 = 5.9685e-04
Loss = 3.6538e-02, PNorm = 153.0063, GNorm = 0.6511, lr_0 = 5.9644e-04
Loss = 3.7627e-02, PNorm = 153.0444, GNorm = 0.3356, lr_0 = 5.9603e-04
Loss = 3.7860e-02, PNorm = 153.0852, GNorm = 0.6329, lr_0 = 5.9562e-04
Loss = 3.6685e-02, PNorm = 153.1200, GNorm = 0.4404, lr_0 = 5.9521e-04
Loss = 3.8320e-02, PNorm = 153.1598, GNorm = 0.8432, lr_0 = 5.9481e-04
Loss = 3.7363e-02, PNorm = 153.2003, GNorm = 0.4405, lr_0 = 5.9440e-04
Loss = 3.4652e-02, PNorm = 153.2355, GNorm = 0.3081, lr_0 = 5.9399e-04
Loss = 3.5481e-02, PNorm = 153.2773, GNorm = 0.5357, lr_0 = 5.9358e-04
Loss = 5.4629e-02, PNorm = 153.3124, GNorm = 0.4708, lr_0 = 5.9318e-04
Loss = 3.9087e-02, PNorm = 153.3578, GNorm = 0.2533, lr_0 = 5.9277e-04
Loss = 4.8028e-02, PNorm = 153.3952, GNorm = 0.4842, lr_0 = 5.9236e-04
Loss = 4.0444e-02, PNorm = 153.4439, GNorm = 0.9049, lr_0 = 5.9196e-04
Loss = 3.9561e-02, PNorm = 153.4910, GNorm = 0.8072, lr_0 = 5.9155e-04
Loss = 3.9963e-02, PNorm = 153.5315, GNorm = 0.4823, lr_0 = 5.9115e-04
Loss = 3.4435e-02, PNorm = 153.5769, GNorm = 1.0774, lr_0 = 5.9074e-04
Loss = 4.4624e-02, PNorm = 153.6125, GNorm = 0.3971, lr_0 = 5.9034e-04
Loss = 3.9604e-02, PNorm = 153.6544, GNorm = 0.5507, lr_0 = 5.8993e-04
Loss = 4.6613e-02, PNorm = 153.6941, GNorm = 0.2907, lr_0 = 5.8953e-04
Loss = 3.8622e-02, PNorm = 153.7375, GNorm = 0.4263, lr_0 = 5.8913e-04
Loss = 4.9118e-02, PNorm = 153.7851, GNorm = 0.2944, lr_0 = 5.8872e-04
Loss = 4.1212e-02, PNorm = 153.8315, GNorm = 0.2861, lr_0 = 5.8832e-04
Loss = 3.3556e-02, PNorm = 153.8775, GNorm = 0.3652, lr_0 = 5.8792e-04
Loss = 3.2746e-02, PNorm = 153.9240, GNorm = 0.3413, lr_0 = 5.8751e-04
Loss = 3.8381e-02, PNorm = 153.9676, GNorm = 0.5973, lr_0 = 5.8711e-04
Loss = 3.8065e-02, PNorm = 154.0095, GNorm = 0.5409, lr_0 = 5.8671e-04
Loss = 4.0794e-02, PNorm = 154.0491, GNorm = 0.3898, lr_0 = 5.8631e-04
Loss = 3.4961e-02, PNorm = 154.0918, GNorm = 0.4986, lr_0 = 5.8591e-04
Loss = 4.3310e-02, PNorm = 154.1327, GNorm = 0.4245, lr_0 = 5.8550e-04
Loss = 4.5785e-02, PNorm = 154.1812, GNorm = 0.3569, lr_0 = 5.8510e-04
Loss = 4.7705e-02, PNorm = 154.2265, GNorm = 0.6700, lr_0 = 5.8470e-04
Loss = 3.6841e-02, PNorm = 154.2757, GNorm = 0.3040, lr_0 = 5.8430e-04
Loss = 4.8555e-02, PNorm = 154.3250, GNorm = 0.3563, lr_0 = 5.8390e-04
Loss = 3.3115e-02, PNorm = 154.3687, GNorm = 0.3136, lr_0 = 5.8350e-04
Loss = 4.2359e-02, PNorm = 154.4113, GNorm = 0.7044, lr_0 = 5.8310e-04
Loss = 3.7953e-02, PNorm = 154.4552, GNorm = 0.5517, lr_0 = 5.8270e-04
Loss = 3.2557e-02, PNorm = 154.5006, GNorm = 0.3639, lr_0 = 5.8230e-04
Loss = 3.6848e-02, PNorm = 154.5496, GNorm = 0.6137, lr_0 = 5.8190e-04
Loss = 3.2737e-02, PNorm = 154.5906, GNorm = 0.2378, lr_0 = 5.8151e-04
Loss = 3.7998e-02, PNorm = 154.6295, GNorm = 0.8751, lr_0 = 5.8111e-04
Loss = 3.7383e-02, PNorm = 154.6719, GNorm = 0.4293, lr_0 = 5.8071e-04
Loss = 4.4885e-02, PNorm = 154.7148, GNorm = 0.5839, lr_0 = 5.8031e-04
Loss = 4.8469e-02, PNorm = 154.7673, GNorm = 0.3730, lr_0 = 5.7991e-04
Loss = 3.8969e-02, PNorm = 154.8120, GNorm = 0.8266, lr_0 = 5.7952e-04
Loss = 4.1238e-02, PNorm = 154.8594, GNorm = 0.3374, lr_0 = 5.7912e-04
Loss = 3.8440e-02, PNorm = 154.9078, GNorm = 0.4604, lr_0 = 5.7872e-04
Loss = 4.3515e-02, PNorm = 154.9539, GNorm = 0.4850, lr_0 = 5.7833e-04
Loss = 4.6877e-02, PNorm = 155.0039, GNorm = 0.3213, lr_0 = 5.7793e-04
Loss = 4.6618e-02, PNorm = 155.0504, GNorm = 0.6661, lr_0 = 5.7753e-04
Loss = 4.1273e-02, PNorm = 155.0976, GNorm = 0.6212, lr_0 = 5.7714e-04
Loss = 4.4542e-02, PNorm = 155.1473, GNorm = 0.2936, lr_0 = 5.7674e-04
Loss = 3.9768e-02, PNorm = 155.1971, GNorm = 0.3311, lr_0 = 5.7635e-04
Loss = 3.5918e-02, PNorm = 155.2440, GNorm = 0.4104, lr_0 = 5.7595e-04
Loss = 4.7564e-02, PNorm = 155.2883, GNorm = 0.5008, lr_0 = 5.7556e-04
Loss = 3.6872e-02, PNorm = 155.3360, GNorm = 0.4561, lr_0 = 5.7516e-04
Loss = 4.2427e-02, PNorm = 155.3824, GNorm = 0.4562, lr_0 = 5.7477e-04
Loss = 3.8512e-02, PNorm = 155.4288, GNorm = 0.8129, lr_0 = 5.7438e-04
Loss = 3.7960e-02, PNorm = 155.4782, GNorm = 0.5553, lr_0 = 5.7398e-04
Loss = 4.0012e-02, PNorm = 155.5222, GNorm = 0.3352, lr_0 = 5.7359e-04
Loss = 4.2274e-02, PNorm = 155.5645, GNorm = 0.5782, lr_0 = 5.7320e-04
Loss = 4.5871e-02, PNorm = 155.6100, GNorm = 0.5552, lr_0 = 5.7280e-04
Loss = 4.0396e-02, PNorm = 155.6591, GNorm = 0.6139, lr_0 = 5.7241e-04
Loss = 5.0429e-02, PNorm = 155.7060, GNorm = 0.5677, lr_0 = 5.7202e-04
Loss = 3.7706e-02, PNorm = 155.7576, GNorm = 0.6268, lr_0 = 5.7163e-04
Loss = 3.7736e-02, PNorm = 155.8013, GNorm = 0.4925, lr_0 = 5.7124e-04
Loss = 3.8071e-02, PNorm = 155.8494, GNorm = 0.5463, lr_0 = 5.7084e-04
Loss = 5.8600e-02, PNorm = 155.8991, GNorm = 0.6753, lr_0 = 5.7045e-04
Loss = 4.8136e-02, PNorm = 155.9528, GNorm = 0.4340, lr_0 = 5.7006e-04
Loss = 3.9253e-02, PNorm = 156.0090, GNorm = 0.4921, lr_0 = 5.6967e-04
Loss = 3.3314e-02, PNorm = 156.0571, GNorm = 0.5545, lr_0 = 5.6928e-04
Loss = 3.4314e-02, PNorm = 156.1036, GNorm = 0.2736, lr_0 = 5.6889e-04
Loss = 3.5298e-02, PNorm = 156.1486, GNorm = 0.6926, lr_0 = 5.6850e-04
Loss = 4.3150e-02, PNorm = 156.1983, GNorm = 0.3073, lr_0 = 5.6811e-04
Loss = 4.3268e-02, PNorm = 156.2440, GNorm = 0.4816, lr_0 = 5.6772e-04
Loss = 5.1987e-02, PNorm = 156.2932, GNorm = 0.4120, lr_0 = 5.6733e-04
Loss = 4.8381e-02, PNorm = 156.3458, GNorm = 0.7228, lr_0 = 5.6695e-04
Loss = 3.9908e-02, PNorm = 156.3946, GNorm = 0.3522, lr_0 = 5.6656e-04
Loss = 4.3149e-02, PNorm = 156.4512, GNorm = 0.4432, lr_0 = 5.6617e-04
Loss = 4.0974e-02, PNorm = 156.5030, GNorm = 0.6081, lr_0 = 5.6578e-04
Loss = 4.4823e-02, PNorm = 156.5498, GNorm = 1.1300, lr_0 = 5.6539e-04
Loss = 4.4534e-02, PNorm = 156.6106, GNorm = 0.3741, lr_0 = 5.6501e-04
Loss = 4.1207e-02, PNorm = 156.6649, GNorm = 0.4817, lr_0 = 5.6462e-04
Loss = 4.8417e-02, PNorm = 156.7219, GNorm = 0.3480, lr_0 = 5.6423e-04
Loss = 4.5045e-02, PNorm = 156.7718, GNorm = 0.8631, lr_0 = 5.6385e-04
Loss = 4.2045e-02, PNorm = 156.8264, GNorm = 0.4624, lr_0 = 5.6346e-04
Loss = 3.9896e-02, PNorm = 156.8785, GNorm = 0.3677, lr_0 = 5.6307e-04
Loss = 4.0359e-02, PNorm = 156.9294, GNorm = 0.3001, lr_0 = 5.6269e-04
Loss = 4.4654e-02, PNorm = 156.9784, GNorm = 0.3963, lr_0 = 5.6230e-04
Validation mae = 0.123038
Epoch 9
Loss = 3.6201e-02, PNorm = 157.0161, GNorm = 0.3910, lr_0 = 5.6192e-04
Loss = 3.8282e-02, PNorm = 157.0527, GNorm = 0.4500, lr_0 = 5.6153e-04
Loss = 3.9236e-02, PNorm = 157.0935, GNorm = 0.2977, lr_0 = 5.6115e-04
Loss = 3.4628e-02, PNorm = 157.1296, GNorm = 0.5549, lr_0 = 5.6076e-04
Loss = 3.1563e-02, PNorm = 157.1616, GNorm = 0.3518, lr_0 = 5.6038e-04
Loss = 3.7317e-02, PNorm = 157.1923, GNorm = 0.2779, lr_0 = 5.6000e-04
Loss = 4.3188e-02, PNorm = 157.2233, GNorm = 0.4510, lr_0 = 5.5961e-04
Loss = 2.8300e-02, PNorm = 157.2540, GNorm = 0.3420, lr_0 = 5.5923e-04
Loss = 3.4632e-02, PNorm = 157.2890, GNorm = 0.3086, lr_0 = 5.5885e-04
Loss = 3.7634e-02, PNorm = 157.3210, GNorm = 0.3413, lr_0 = 5.5846e-04
Loss = 3.0464e-02, PNorm = 157.3564, GNorm = 0.4023, lr_0 = 5.5808e-04
Loss = 2.8141e-02, PNorm = 157.3852, GNorm = 0.5458, lr_0 = 5.5770e-04
Loss = 3.3555e-02, PNorm = 157.4181, GNorm = 0.3132, lr_0 = 5.5732e-04
Loss = 2.8424e-02, PNorm = 157.4479, GNorm = 0.3368, lr_0 = 5.5693e-04
Loss = 3.2022e-02, PNorm = 157.4803, GNorm = 0.3523, lr_0 = 5.5655e-04
Loss = 3.8628e-02, PNorm = 157.5122, GNorm = 0.2118, lr_0 = 5.5617e-04
Loss = 2.5950e-02, PNorm = 157.5411, GNorm = 0.2807, lr_0 = 5.5579e-04
Loss = 3.0882e-02, PNorm = 157.5705, GNorm = 0.4199, lr_0 = 5.5541e-04
Loss = 2.8358e-02, PNorm = 157.6027, GNorm = 0.3967, lr_0 = 5.5503e-04
Loss = 3.0311e-02, PNorm = 157.6356, GNorm = 0.5372, lr_0 = 5.5465e-04
Loss = 2.9537e-02, PNorm = 157.6680, GNorm = 0.4054, lr_0 = 5.5427e-04
Loss = 3.2310e-02, PNorm = 157.7031, GNorm = 0.5581, lr_0 = 5.5389e-04
Loss = 5.5524e-02, PNorm = 157.7373, GNorm = 0.4093, lr_0 = 5.5351e-04
Loss = 2.8570e-02, PNorm = 157.7783, GNorm = 0.2371, lr_0 = 5.5313e-04
Loss = 3.7636e-02, PNorm = 157.8123, GNorm = 0.2624, lr_0 = 5.5275e-04
Loss = 3.5277e-02, PNorm = 157.8412, GNorm = 0.4343, lr_0 = 5.5237e-04
Loss = 3.0207e-02, PNorm = 157.8780, GNorm = 0.3681, lr_0 = 5.5199e-04
Loss = 3.1855e-02, PNorm = 157.9114, GNorm = 0.6218, lr_0 = 5.5162e-04
Loss = 3.1498e-02, PNorm = 157.9456, GNorm = 0.3286, lr_0 = 5.5124e-04
Loss = 3.6121e-02, PNorm = 157.9805, GNorm = 0.3580, lr_0 = 5.5086e-04
Loss = 2.5512e-02, PNorm = 158.0129, GNorm = 0.2176, lr_0 = 5.5048e-04
Loss = 3.5157e-02, PNorm = 158.0452, GNorm = 0.5103, lr_0 = 5.5011e-04
Loss = 3.0509e-02, PNorm = 158.0766, GNorm = 0.5616, lr_0 = 5.4973e-04
Loss = 2.9222e-02, PNorm = 158.1136, GNorm = 0.2383, lr_0 = 5.4935e-04
Loss = 3.2390e-02, PNorm = 158.1530, GNorm = 0.3012, lr_0 = 5.4898e-04
Loss = 3.4404e-02, PNorm = 158.1890, GNorm = 0.2256, lr_0 = 5.4860e-04
Loss = 2.3822e-02, PNorm = 158.2181, GNorm = 0.2237, lr_0 = 5.4822e-04
Loss = 3.3989e-02, PNorm = 158.2516, GNorm = 0.3510, lr_0 = 5.4785e-04
Loss = 3.4088e-02, PNorm = 158.2887, GNorm = 0.2546, lr_0 = 5.4747e-04
Loss = 2.8144e-02, PNorm = 158.3235, GNorm = 0.4777, lr_0 = 5.4710e-04
Loss = 3.5963e-02, PNorm = 158.3539, GNorm = 0.3329, lr_0 = 5.4672e-04
Loss = 3.1600e-02, PNorm = 158.3912, GNorm = 0.8881, lr_0 = 5.4635e-04
Loss = 3.6619e-02, PNorm = 158.4283, GNorm = 0.2787, lr_0 = 5.4597e-04
Loss = 3.2334e-02, PNorm = 158.4618, GNorm = 0.9212, lr_0 = 5.4560e-04
Loss = 2.9622e-02, PNorm = 158.4928, GNorm = 0.5537, lr_0 = 5.4523e-04
Loss = 2.8411e-02, PNorm = 158.5271, GNorm = 0.6348, lr_0 = 5.4485e-04
Loss = 2.9399e-02, PNorm = 158.5590, GNorm = 0.2722, lr_0 = 5.4448e-04
Loss = 4.2771e-02, PNorm = 158.6001, GNorm = 1.1186, lr_0 = 5.4411e-04
Loss = 3.1052e-02, PNorm = 158.6423, GNorm = 0.3660, lr_0 = 5.4373e-04
Loss = 3.4806e-02, PNorm = 158.6901, GNorm = 0.3898, lr_0 = 5.4336e-04
Loss = 3.2577e-02, PNorm = 158.7271, GNorm = 0.2337, lr_0 = 5.4299e-04
Loss = 3.9041e-02, PNorm = 158.7645, GNorm = 0.6384, lr_0 = 5.4262e-04
Loss = 3.3058e-02, PNorm = 158.8000, GNorm = 0.3797, lr_0 = 5.4225e-04
Loss = 3.5442e-02, PNorm = 158.8344, GNorm = 0.6001, lr_0 = 5.4187e-04
Loss = 3.7510e-02, PNorm = 158.8716, GNorm = 0.4266, lr_0 = 5.4150e-04
Loss = 3.0925e-02, PNorm = 158.9067, GNorm = 0.3902, lr_0 = 5.4113e-04
Loss = 2.6322e-02, PNorm = 158.9428, GNorm = 0.4439, lr_0 = 5.4076e-04
Loss = 2.7937e-02, PNorm = 158.9776, GNorm = 0.4551, lr_0 = 5.4039e-04
Loss = 3.8183e-02, PNorm = 159.0153, GNorm = 0.2939, lr_0 = 5.4002e-04
Loss = 3.9775e-02, PNorm = 159.0527, GNorm = 0.3896, lr_0 = 5.3965e-04
Loss = 2.8780e-02, PNorm = 159.0891, GNorm = 0.3408, lr_0 = 5.3928e-04
Loss = 3.4055e-02, PNorm = 159.1286, GNorm = 0.3698, lr_0 = 5.3891e-04
Loss = 4.8060e-02, PNorm = 159.1731, GNorm = 0.4561, lr_0 = 5.3854e-04
Loss = 3.8995e-02, PNorm = 159.2086, GNorm = 0.4231, lr_0 = 5.3817e-04
Loss = 3.8086e-02, PNorm = 159.2562, GNorm = 0.5231, lr_0 = 5.3781e-04
Loss = 3.2985e-02, PNorm = 159.2979, GNorm = 0.2354, lr_0 = 5.3744e-04
Loss = 3.5145e-02, PNorm = 159.3417, GNorm = 0.2645, lr_0 = 5.3707e-04
Loss = 3.0841e-02, PNorm = 159.3842, GNorm = 0.3702, lr_0 = 5.3670e-04
Loss = 2.8539e-02, PNorm = 159.4253, GNorm = 0.5749, lr_0 = 5.3633e-04
Loss = 3.7154e-02, PNorm = 159.4654, GNorm = 0.6406, lr_0 = 5.3597e-04
Loss = 3.1162e-02, PNorm = 159.5063, GNorm = 0.2425, lr_0 = 5.3560e-04
Loss = 4.1475e-02, PNorm = 159.5444, GNorm = 0.9947, lr_0 = 5.3523e-04
Loss = 3.6693e-02, PNorm = 159.5834, GNorm = 1.2730, lr_0 = 5.3486e-04
Loss = 4.7987e-02, PNorm = 159.6252, GNorm = 0.2978, lr_0 = 5.3450e-04
Loss = 3.3700e-02, PNorm = 159.6670, GNorm = 0.2945, lr_0 = 5.3413e-04
Loss = 3.1160e-02, PNorm = 159.7027, GNorm = 0.6075, lr_0 = 5.3377e-04
Loss = 3.0768e-02, PNorm = 159.7408, GNorm = 0.4378, lr_0 = 5.3340e-04
Loss = 2.9143e-02, PNorm = 159.7779, GNorm = 0.4481, lr_0 = 5.3304e-04
Loss = 2.6369e-02, PNorm = 159.8130, GNorm = 0.2606, lr_0 = 5.3267e-04
Loss = 3.0510e-02, PNorm = 159.8428, GNorm = 0.3462, lr_0 = 5.3231e-04
Loss = 3.1123e-02, PNorm = 159.8797, GNorm = 0.2959, lr_0 = 5.3194e-04
Loss = 2.7517e-02, PNorm = 159.9199, GNorm = 0.3700, lr_0 = 5.3158e-04
Loss = 2.9114e-02, PNorm = 159.9597, GNorm = 0.4064, lr_0 = 5.3121e-04
Loss = 2.9229e-02, PNorm = 159.9970, GNorm = 0.4034, lr_0 = 5.3085e-04
Loss = 2.8843e-02, PNorm = 160.0390, GNorm = 0.3277, lr_0 = 5.3048e-04
Loss = 3.4343e-02, PNorm = 160.0817, GNorm = 0.3863, lr_0 = 5.3012e-04
Loss = 3.6608e-02, PNorm = 160.1261, GNorm = 0.5287, lr_0 = 5.2976e-04
Loss = 3.0788e-02, PNorm = 160.1637, GNorm = 0.2793, lr_0 = 5.2939e-04
Loss = 2.7931e-02, PNorm = 160.2021, GNorm = 0.3752, lr_0 = 5.2903e-04
Loss = 4.4958e-02, PNorm = 160.2413, GNorm = 0.2654, lr_0 = 5.2867e-04
Loss = 3.8593e-02, PNorm = 160.2855, GNorm = 0.4657, lr_0 = 5.2831e-04
Loss = 2.9743e-02, PNorm = 160.3264, GNorm = 0.4346, lr_0 = 5.2795e-04
Loss = 3.4671e-02, PNorm = 160.3707, GNorm = 0.4032, lr_0 = 5.2758e-04
Loss = 2.9572e-02, PNorm = 160.4084, GNorm = 0.4431, lr_0 = 5.2722e-04
Loss = 2.8679e-02, PNorm = 160.4484, GNorm = 0.4871, lr_0 = 5.2686e-04
Loss = 3.0925e-02, PNorm = 160.4845, GNorm = 0.2097, lr_0 = 5.2650e-04
Loss = 3.5716e-02, PNorm = 160.5291, GNorm = 0.5241, lr_0 = 5.2614e-04
Loss = 3.8315e-02, PNorm = 160.5706, GNorm = 0.3951, lr_0 = 5.2578e-04
Loss = 2.7748e-02, PNorm = 160.6179, GNorm = 0.2013, lr_0 = 5.2542e-04
Loss = 3.9476e-02, PNorm = 160.6636, GNorm = 0.4627, lr_0 = 5.2506e-04
Loss = 3.9766e-02, PNorm = 160.7078, GNorm = 0.3397, lr_0 = 5.2470e-04
Loss = 3.0177e-02, PNorm = 160.7518, GNorm = 0.4773, lr_0 = 5.2434e-04
Loss = 3.9388e-02, PNorm = 160.7952, GNorm = 0.5152, lr_0 = 5.2398e-04
Loss = 4.6082e-02, PNorm = 160.8388, GNorm = 0.7335, lr_0 = 5.2362e-04
Loss = 3.1285e-02, PNorm = 160.8759, GNorm = 0.4990, lr_0 = 5.2326e-04
Loss = 3.3883e-02, PNorm = 160.9203, GNorm = 0.4320, lr_0 = 5.2290e-04
Loss = 3.9877e-02, PNorm = 160.9598, GNorm = 0.4218, lr_0 = 5.2255e-04
Loss = 2.8060e-02, PNorm = 161.0071, GNorm = 0.2502, lr_0 = 5.2219e-04
Loss = 2.7241e-02, PNorm = 161.0518, GNorm = 0.6278, lr_0 = 5.2183e-04
Loss = 3.1839e-02, PNorm = 161.0873, GNorm = 0.6002, lr_0 = 5.2147e-04
Loss = 3.9397e-02, PNorm = 161.1322, GNorm = 0.6355, lr_0 = 5.2112e-04
Loss = 3.5819e-02, PNorm = 161.1761, GNorm = 0.4994, lr_0 = 5.2076e-04
Loss = 3.2537e-02, PNorm = 161.2174, GNorm = 0.5791, lr_0 = 5.2040e-04
Loss = 3.5291e-02, PNorm = 161.2620, GNorm = 0.2770, lr_0 = 5.2005e-04
Loss = 3.3270e-02, PNorm = 161.3067, GNorm = 0.2977, lr_0 = 5.1969e-04
Loss = 3.2831e-02, PNorm = 161.3484, GNorm = 0.3978, lr_0 = 5.1933e-04
Loss = 3.4071e-02, PNorm = 161.3916, GNorm = 0.2267, lr_0 = 5.1898e-04
Loss = 3.6546e-02, PNorm = 161.4349, GNorm = 0.7413, lr_0 = 5.1862e-04
Loss = 3.1502e-02, PNorm = 161.4772, GNorm = 0.4605, lr_0 = 5.1827e-04
Loss = 3.6396e-02, PNorm = 161.5247, GNorm = 0.6730, lr_0 = 5.1791e-04
Validation mae = 0.122872
Epoch 10
Loss = 2.9522e-02, PNorm = 161.5587, GNorm = 0.4536, lr_0 = 5.1756e-04
Loss = 3.1701e-02, PNorm = 161.5931, GNorm = 0.4866, lr_0 = 5.1720e-04
Loss = 3.3634e-02, PNorm = 161.6217, GNorm = 0.1901, lr_0 = 5.1685e-04
Loss = 3.1380e-02, PNorm = 161.6548, GNorm = 0.4542, lr_0 = 5.1649e-04
Loss = 3.0281e-02, PNorm = 161.6886, GNorm = 0.3624, lr_0 = 5.1614e-04
Loss = 2.6717e-02, PNorm = 161.7203, GNorm = 0.3480, lr_0 = 5.1579e-04
Loss = 2.8828e-02, PNorm = 161.7481, GNorm = 0.4549, lr_0 = 5.1543e-04
Loss = 2.3673e-02, PNorm = 161.7760, GNorm = 0.3188, lr_0 = 5.1508e-04
Loss = 2.9209e-02, PNorm = 161.8050, GNorm = 0.2112, lr_0 = 5.1473e-04
Loss = 2.8469e-02, PNorm = 161.8366, GNorm = 0.1876, lr_0 = 5.1437e-04
Loss = 3.0890e-02, PNorm = 161.8706, GNorm = 0.3052, lr_0 = 5.1402e-04
Loss = 2.4431e-02, PNorm = 161.8997, GNorm = 0.6831, lr_0 = 5.1367e-04
Loss = 2.6811e-02, PNorm = 161.9287, GNorm = 0.3953, lr_0 = 5.1332e-04
Loss = 2.5060e-02, PNorm = 161.9544, GNorm = 0.3953, lr_0 = 5.1297e-04
Loss = 2.8669e-02, PNorm = 161.9876, GNorm = 0.3483, lr_0 = 5.1262e-04
Loss = 2.7512e-02, PNorm = 162.0207, GNorm = 0.2810, lr_0 = 5.1226e-04
Loss = 2.9476e-02, PNorm = 162.0532, GNorm = 0.4566, lr_0 = 5.1191e-04
Loss = 2.1135e-02, PNorm = 162.0769, GNorm = 0.4040, lr_0 = 5.1156e-04
Loss = 3.1887e-02, PNorm = 162.1027, GNorm = 0.2215, lr_0 = 5.1121e-04
Loss = 2.8618e-02, PNorm = 162.1294, GNorm = 0.4376, lr_0 = 5.1086e-04
Loss = 2.1751e-02, PNorm = 162.1601, GNorm = 0.4430, lr_0 = 5.1051e-04
Loss = 2.5009e-02, PNorm = 162.1873, GNorm = 0.3708, lr_0 = 5.1016e-04
Loss = 2.8497e-02, PNorm = 162.2204, GNorm = 0.3318, lr_0 = 5.0981e-04
Loss = 2.3019e-02, PNorm = 162.2488, GNorm = 0.3730, lr_0 = 5.0946e-04
Loss = 2.7525e-02, PNorm = 162.2770, GNorm = 0.1931, lr_0 = 5.0911e-04
Loss = 3.2155e-02, PNorm = 162.3064, GNorm = 0.3303, lr_0 = 5.0877e-04
Loss = 2.3769e-02, PNorm = 162.3311, GNorm = 0.2460, lr_0 = 5.0842e-04
Loss = 2.7720e-02, PNorm = 162.3596, GNorm = 0.2991, lr_0 = 5.0807e-04
Loss = 3.3821e-02, PNorm = 162.3908, GNorm = 0.2637, lr_0 = 5.0772e-04
Loss = 2.9812e-02, PNorm = 162.4203, GNorm = 0.2449, lr_0 = 5.0737e-04
Loss = 2.9712e-02, PNorm = 162.4441, GNorm = 1.1606, lr_0 = 5.0703e-04
Loss = 2.1688e-02, PNorm = 162.4692, GNorm = 0.2666, lr_0 = 5.0668e-04
Loss = 4.3972e-02, PNorm = 162.5022, GNorm = 0.4261, lr_0 = 5.0633e-04
Loss = 2.2175e-02, PNorm = 162.5389, GNorm = 0.2959, lr_0 = 5.0598e-04
Loss = 2.9941e-02, PNorm = 162.5694, GNorm = 0.4466, lr_0 = 5.0564e-04
Loss = 2.9496e-02, PNorm = 162.5989, GNorm = 0.5326, lr_0 = 5.0529e-04
Loss = 2.5933e-02, PNorm = 162.6273, GNorm = 0.3874, lr_0 = 5.0494e-04
Loss = 3.0784e-02, PNorm = 162.6555, GNorm = 0.4202, lr_0 = 5.0460e-04
Loss = 2.4130e-02, PNorm = 162.6858, GNorm = 0.2529, lr_0 = 5.0425e-04
Loss = 1.9912e-02, PNorm = 162.7171, GNorm = 0.2936, lr_0 = 5.0391e-04
Loss = 3.5142e-02, PNorm = 162.7455, GNorm = 0.6791, lr_0 = 5.0356e-04
Loss = 3.6502e-02, PNorm = 162.7769, GNorm = 0.3823, lr_0 = 5.0322e-04
Loss = 2.9742e-02, PNorm = 162.8086, GNorm = 0.4586, lr_0 = 5.0287e-04
Loss = 3.3428e-02, PNorm = 162.8351, GNorm = 0.3028, lr_0 = 5.0253e-04
Loss = 2.2245e-02, PNorm = 162.8642, GNorm = 0.6408, lr_0 = 5.0218e-04
Loss = 2.4890e-02, PNorm = 162.8918, GNorm = 0.4494, lr_0 = 5.0184e-04
Loss = 2.0669e-02, PNorm = 162.9190, GNorm = 0.2990, lr_0 = 5.0150e-04
Loss = 2.5120e-02, PNorm = 162.9428, GNorm = 0.3631, lr_0 = 5.0115e-04
Loss = 2.9731e-02, PNorm = 162.9697, GNorm = 0.5290, lr_0 = 5.0081e-04
Loss = 2.8164e-02, PNorm = 163.0037, GNorm = 0.2677, lr_0 = 5.0047e-04
Loss = 2.4553e-02, PNorm = 163.0369, GNorm = 0.4109, lr_0 = 5.0012e-04
Loss = 3.0474e-02, PNorm = 163.0777, GNorm = 0.2329, lr_0 = 4.9978e-04
Loss = 2.3399e-02, PNorm = 163.1128, GNorm = 0.5615, lr_0 = 4.9944e-04
Loss = 2.6146e-02, PNorm = 163.1453, GNorm = 0.5113, lr_0 = 4.9910e-04
Loss = 2.7271e-02, PNorm = 163.1783, GNorm = 0.4878, lr_0 = 4.9875e-04
Loss = 2.7095e-02, PNorm = 163.2105, GNorm = 0.2581, lr_0 = 4.9841e-04
Loss = 2.5089e-02, PNorm = 163.2427, GNorm = 0.2887, lr_0 = 4.9807e-04
Loss = 3.4759e-02, PNorm = 163.2704, GNorm = 0.2445, lr_0 = 4.9773e-04
Loss = 3.3785e-02, PNorm = 163.3018, GNorm = 0.3809, lr_0 = 4.9739e-04
Loss = 2.9719e-02, PNorm = 163.3352, GNorm = 0.4586, lr_0 = 4.9705e-04
Loss = 3.7639e-02, PNorm = 163.3737, GNorm = 1.0088, lr_0 = 4.9671e-04
Loss = 2.4009e-02, PNorm = 163.4103, GNorm = 0.4900, lr_0 = 4.9637e-04
Loss = 3.1565e-02, PNorm = 163.4447, GNorm = 0.4578, lr_0 = 4.9603e-04
Loss = 2.6035e-02, PNorm = 163.4792, GNorm = 0.2779, lr_0 = 4.9569e-04
Loss = 2.4629e-02, PNorm = 163.5147, GNorm = 0.3353, lr_0 = 4.9535e-04
Loss = 2.3708e-02, PNorm = 163.5494, GNorm = 0.3936, lr_0 = 4.9501e-04
Loss = 2.4824e-02, PNorm = 163.5769, GNorm = 0.4612, lr_0 = 4.9467e-04
Loss = 2.5508e-02, PNorm = 163.6052, GNorm = 0.3921, lr_0 = 4.9433e-04
Loss = 2.7030e-02, PNorm = 163.6344, GNorm = 0.2002, lr_0 = 4.9399e-04
Loss = 3.2000e-02, PNorm = 163.6617, GNorm = 0.4158, lr_0 = 4.9365e-04
Loss = 2.0961e-02, PNorm = 163.6939, GNorm = 0.3214, lr_0 = 4.9332e-04
Loss = 2.2490e-02, PNorm = 163.7258, GNorm = 0.4124, lr_0 = 4.9298e-04
Loss = 2.1560e-02, PNorm = 163.7582, GNorm = 0.2816, lr_0 = 4.9264e-04
Loss = 2.9647e-02, PNorm = 163.7927, GNorm = 0.3709, lr_0 = 4.9230e-04
Loss = 2.7528e-02, PNorm = 163.8268, GNorm = 0.3723, lr_0 = 4.9197e-04
Loss = 3.3150e-02, PNorm = 163.8626, GNorm = 0.4666, lr_0 = 4.9163e-04
Loss = 2.6244e-02, PNorm = 163.8992, GNorm = 0.2140, lr_0 = 4.9129e-04
Loss = 2.1488e-02, PNorm = 163.9349, GNorm = 0.1979, lr_0 = 4.9095e-04
Loss = 2.9426e-02, PNorm = 163.9670, GNorm = 0.4072, lr_0 = 4.9062e-04
Loss = 3.0135e-02, PNorm = 163.9991, GNorm = 0.2363, lr_0 = 4.9028e-04
Loss = 2.9977e-02, PNorm = 164.0288, GNorm = 0.4466, lr_0 = 4.8995e-04
Loss = 3.2835e-02, PNorm = 164.0660, GNorm = 0.4917, lr_0 = 4.8961e-04
Loss = 2.4631e-02, PNorm = 164.1032, GNorm = 0.3280, lr_0 = 4.8928e-04
Loss = 3.7998e-02, PNorm = 164.1368, GNorm = 0.3287, lr_0 = 4.8894e-04
Loss = 2.4301e-02, PNorm = 164.1740, GNorm = 0.4388, lr_0 = 4.8861e-04
Loss = 2.5283e-02, PNorm = 164.2100, GNorm = 0.3597, lr_0 = 4.8827e-04
Loss = 2.9062e-02, PNorm = 164.2421, GNorm = 0.6478, lr_0 = 4.8794e-04
Loss = 2.6594e-02, PNorm = 164.2752, GNorm = 0.5784, lr_0 = 4.8760e-04
Loss = 2.9432e-02, PNorm = 164.3134, GNorm = 0.3244, lr_0 = 4.8727e-04
Loss = 3.2563e-02, PNorm = 164.3478, GNorm = 0.4004, lr_0 = 4.8693e-04
Loss = 2.8246e-02, PNorm = 164.3836, GNorm = 0.3900, lr_0 = 4.8660e-04
Loss = 2.3068e-02, PNorm = 164.4204, GNorm = 0.4969, lr_0 = 4.8627e-04
Loss = 3.2522e-02, PNorm = 164.4602, GNorm = 0.3465, lr_0 = 4.8593e-04
Loss = 2.5863e-02, PNorm = 164.4955, GNorm = 0.3569, lr_0 = 4.8560e-04
Loss = 2.4088e-02, PNorm = 164.5332, GNorm = 0.3475, lr_0 = 4.8527e-04
Loss = 2.7324e-02, PNorm = 164.5657, GNorm = 0.3519, lr_0 = 4.8494e-04
Loss = 2.8833e-02, PNorm = 164.5963, GNorm = 0.2031, lr_0 = 4.8460e-04
Loss = 2.6924e-02, PNorm = 164.6275, GNorm = 0.4142, lr_0 = 4.8427e-04
Loss = 3.7166e-02, PNorm = 164.6584, GNorm = 0.3792, lr_0 = 4.8394e-04
Loss = 3.2008e-02, PNorm = 164.6907, GNorm = 0.9622, lr_0 = 4.8361e-04
Loss = 2.4314e-02, PNorm = 164.7199, GNorm = 0.3156, lr_0 = 4.8328e-04
Loss = 3.1231e-02, PNorm = 164.7533, GNorm = 0.2740, lr_0 = 4.8295e-04
Loss = 2.5492e-02, PNorm = 164.7855, GNorm = 0.2891, lr_0 = 4.8262e-04
Loss = 2.6061e-02, PNorm = 164.8194, GNorm = 0.3515, lr_0 = 4.8228e-04
Loss = 2.2520e-02, PNorm = 164.8537, GNorm = 0.5237, lr_0 = 4.8195e-04
Loss = 2.4189e-02, PNorm = 164.8864, GNorm = 0.3388, lr_0 = 4.8162e-04
Loss = 2.7509e-02, PNorm = 164.9213, GNorm = 0.5503, lr_0 = 4.8129e-04
Loss = 2.5679e-02, PNorm = 164.9553, GNorm = 0.4770, lr_0 = 4.8096e-04
Loss = 2.4546e-02, PNorm = 164.9864, GNorm = 0.2030, lr_0 = 4.8064e-04
Loss = 2.4404e-02, PNorm = 165.0178, GNorm = 0.2585, lr_0 = 4.8031e-04
Loss = 3.0443e-02, PNorm = 165.0508, GNorm = 0.4110, lr_0 = 4.7998e-04
Loss = 2.4505e-02, PNorm = 165.0853, GNorm = 0.2382, lr_0 = 4.7965e-04
Loss = 2.8776e-02, PNorm = 165.1219, GNorm = 0.2710, lr_0 = 4.7932e-04
Loss = 2.7010e-02, PNorm = 165.1546, GNorm = 0.3571, lr_0 = 4.7899e-04
Loss = 2.7807e-02, PNorm = 165.1844, GNorm = 0.3587, lr_0 = 4.7866e-04
Loss = 2.7732e-02, PNorm = 165.2180, GNorm = 0.6918, lr_0 = 4.7833e-04
Loss = 2.7769e-02, PNorm = 165.2554, GNorm = 0.3840, lr_0 = 4.7801e-04
Loss = 2.7402e-02, PNorm = 165.2950, GNorm = 1.0180, lr_0 = 4.7768e-04
Loss = 2.6599e-02, PNorm = 165.3320, GNorm = 0.4858, lr_0 = 4.7735e-04
Loss = 2.9616e-02, PNorm = 165.3679, GNorm = 0.4616, lr_0 = 4.7703e-04
Validation mae = 0.122314
Epoch 11
Loss = 2.0685e-02, PNorm = 165.3982, GNorm = 0.5180, lr_0 = 4.7670e-04
Loss = 2.5472e-02, PNorm = 165.4240, GNorm = 0.7248, lr_0 = 4.7637e-04
Loss = 2.1612e-02, PNorm = 165.4518, GNorm = 0.2806, lr_0 = 4.7605e-04
Loss = 2.4009e-02, PNorm = 165.4753, GNorm = 0.5581, lr_0 = 4.7572e-04
Loss = 2.2725e-02, PNorm = 165.5003, GNorm = 0.2774, lr_0 = 4.7539e-04
Loss = 2.4062e-02, PNorm = 165.5263, GNorm = 0.4251, lr_0 = 4.7507e-04
Loss = 2.2514e-02, PNorm = 165.5522, GNorm = 0.7936, lr_0 = 4.7474e-04
Loss = 2.5467e-02, PNorm = 165.5753, GNorm = 0.7628, lr_0 = 4.7442e-04
Loss = 2.2643e-02, PNorm = 165.5969, GNorm = 0.3535, lr_0 = 4.7409e-04
Loss = 2.4058e-02, PNorm = 165.6173, GNorm = 0.5449, lr_0 = 4.7377e-04
Loss = 2.3825e-02, PNorm = 165.6350, GNorm = 0.4808, lr_0 = 4.7344e-04
Loss = 2.3555e-02, PNorm = 165.6655, GNorm = 0.4327, lr_0 = 4.7312e-04
Loss = 2.3673e-02, PNorm = 165.6904, GNorm = 0.2252, lr_0 = 4.7279e-04
Loss = 2.3511e-02, PNorm = 165.7140, GNorm = 0.2950, lr_0 = 4.7247e-04
Loss = 2.1437e-02, PNorm = 165.7368, GNorm = 0.2575, lr_0 = 4.7215e-04
Loss = 2.4650e-02, PNorm = 165.7609, GNorm = 0.2291, lr_0 = 4.7182e-04
Loss = 2.7704e-02, PNorm = 165.7797, GNorm = 0.2694, lr_0 = 4.7150e-04
Loss = 2.2268e-02, PNorm = 165.8046, GNorm = 0.3185, lr_0 = 4.7118e-04
Loss = 2.0394e-02, PNorm = 165.8315, GNorm = 0.2097, lr_0 = 4.7085e-04
Loss = 1.9702e-02, PNorm = 165.8530, GNorm = 0.2926, lr_0 = 4.7053e-04
Loss = 2.1424e-02, PNorm = 165.8728, GNorm = 0.3651, lr_0 = 4.7021e-04
Loss = 2.0638e-02, PNorm = 165.8934, GNorm = 0.2402, lr_0 = 4.6989e-04
Loss = 2.3405e-02, PNorm = 165.9110, GNorm = 0.2348, lr_0 = 4.6957e-04
Loss = 2.0538e-02, PNorm = 165.9323, GNorm = 0.2056, lr_0 = 4.6924e-04
Loss = 1.7645e-02, PNorm = 165.9555, GNorm = 0.2921, lr_0 = 4.6892e-04
Loss = 1.9141e-02, PNorm = 165.9774, GNorm = 0.2128, lr_0 = 4.6860e-04
Loss = 2.3460e-02, PNorm = 165.9993, GNorm = 0.3305, lr_0 = 4.6828e-04
Loss = 2.5167e-02, PNorm = 166.0188, GNorm = 0.4362, lr_0 = 4.6796e-04
Loss = 1.6841e-02, PNorm = 166.0451, GNorm = 0.3432, lr_0 = 4.6764e-04
Loss = 2.1955e-02, PNorm = 166.0661, GNorm = 0.3093, lr_0 = 4.6732e-04
Loss = 2.2776e-02, PNorm = 166.0871, GNorm = 0.6085, lr_0 = 4.6700e-04
Loss = 1.8679e-02, PNorm = 166.1108, GNorm = 0.2756, lr_0 = 4.6668e-04
Loss = 2.2470e-02, PNorm = 166.1413, GNorm = 0.2577, lr_0 = 4.6636e-04
Loss = 2.7484e-02, PNorm = 166.1689, GNorm = 0.2785, lr_0 = 4.6604e-04
Loss = 1.7789e-02, PNorm = 166.1942, GNorm = 0.2900, lr_0 = 4.6572e-04
Loss = 2.2588e-02, PNorm = 166.2170, GNorm = 0.3027, lr_0 = 4.6540e-04
Loss = 2.0629e-02, PNorm = 166.2422, GNorm = 0.1700, lr_0 = 4.6508e-04
Loss = 2.6629e-02, PNorm = 166.2692, GNorm = 0.3950, lr_0 = 4.6476e-04
Loss = 2.8722e-02, PNorm = 166.2917, GNorm = 0.3607, lr_0 = 4.6445e-04
Loss = 2.0731e-02, PNorm = 166.3181, GNorm = 0.2669, lr_0 = 4.6413e-04
Loss = 2.3803e-02, PNorm = 166.3445, GNorm = 0.2250, lr_0 = 4.6381e-04
Loss = 1.7800e-02, PNorm = 166.3690, GNorm = 0.4044, lr_0 = 4.6349e-04
Loss = 2.5948e-02, PNorm = 166.3915, GNorm = 0.1867, lr_0 = 4.6317e-04
Loss = 2.1003e-02, PNorm = 166.4138, GNorm = 0.3598, lr_0 = 4.6286e-04
Loss = 2.5887e-02, PNorm = 166.4445, GNorm = 0.6028, lr_0 = 4.6254e-04
Loss = 2.4963e-02, PNorm = 166.4663, GNorm = 0.3675, lr_0 = 4.6222e-04
Loss = 2.6048e-02, PNorm = 166.4934, GNorm = 0.4450, lr_0 = 4.6191e-04
Loss = 2.1864e-02, PNorm = 166.5204, GNorm = 0.2422, lr_0 = 4.6159e-04
Loss = 2.2845e-02, PNorm = 166.5457, GNorm = 0.3160, lr_0 = 4.6127e-04
Loss = 2.2183e-02, PNorm = 166.5703, GNorm = 0.3226, lr_0 = 4.6096e-04
Loss = 2.1715e-02, PNorm = 166.5942, GNorm = 0.6319, lr_0 = 4.6064e-04
Loss = 2.5192e-02, PNorm = 166.6226, GNorm = 0.2491, lr_0 = 4.6033e-04
Loss = 2.8753e-02, PNorm = 166.6495, GNorm = 0.5824, lr_0 = 4.6001e-04
Loss = 2.0316e-02, PNorm = 166.6749, GNorm = 0.2301, lr_0 = 4.5970e-04
Loss = 2.1162e-02, PNorm = 166.7032, GNorm = 0.2542, lr_0 = 4.5938e-04
Loss = 2.1151e-02, PNorm = 166.7300, GNorm = 0.4947, lr_0 = 4.5907e-04
Loss = 2.6032e-02, PNorm = 166.7558, GNorm = 1.2456, lr_0 = 4.5875e-04
Loss = 2.1931e-02, PNorm = 166.7816, GNorm = 0.3113, lr_0 = 4.5844e-04
Loss = 2.2479e-02, PNorm = 166.8084, GNorm = 0.4409, lr_0 = 4.5812e-04
Loss = 1.8293e-02, PNorm = 166.8378, GNorm = 0.2094, lr_0 = 4.5781e-04
Loss = 1.9126e-02, PNorm = 166.8669, GNorm = 0.2935, lr_0 = 4.5750e-04
Loss = 1.9612e-02, PNorm = 166.8929, GNorm = 0.6730, lr_0 = 4.5718e-04
Loss = 2.2636e-02, PNorm = 166.9196, GNorm = 0.4729, lr_0 = 4.5687e-04
Loss = 1.7950e-02, PNorm = 166.9422, GNorm = 0.2542, lr_0 = 4.5656e-04
Loss = 2.5084e-02, PNorm = 166.9657, GNorm = 0.3493, lr_0 = 4.5624e-04
Loss = 1.7701e-02, PNorm = 166.9907, GNorm = 0.2091, lr_0 = 4.5593e-04
Loss = 2.5101e-02, PNorm = 167.0148, GNorm = 0.3878, lr_0 = 4.5562e-04
Loss = 2.0106e-02, PNorm = 167.0423, GNorm = 0.2517, lr_0 = 4.5531e-04
Loss = 1.9211e-02, PNorm = 167.0729, GNorm = 0.1429, lr_0 = 4.5499e-04
Loss = 1.9023e-02, PNorm = 167.1003, GNorm = 0.2882, lr_0 = 4.5468e-04
Loss = 2.2806e-02, PNorm = 167.1251, GNorm = 0.5473, lr_0 = 4.5437e-04
Loss = 1.9947e-02, PNorm = 167.1502, GNorm = 0.7935, lr_0 = 4.5406e-04
Loss = 2.2251e-02, PNorm = 167.1774, GNorm = 0.5372, lr_0 = 4.5375e-04
Loss = 3.0735e-02, PNorm = 167.2033, GNorm = 0.9359, lr_0 = 4.5344e-04
Loss = 2.0544e-02, PNorm = 167.2346, GNorm = 0.2624, lr_0 = 4.5313e-04
Loss = 1.7995e-02, PNorm = 167.2639, GNorm = 0.3668, lr_0 = 4.5282e-04
Loss = 1.7739e-02, PNorm = 167.2943, GNorm = 0.3007, lr_0 = 4.5251e-04
Loss = 2.6714e-02, PNorm = 167.3249, GNorm = 0.4025, lr_0 = 4.5220e-04
Loss = 2.7211e-02, PNorm = 167.3561, GNorm = 0.5576, lr_0 = 4.5189e-04
Loss = 2.2237e-02, PNorm = 167.3849, GNorm = 0.5849, lr_0 = 4.5158e-04
Loss = 3.5738e-02, PNorm = 167.4176, GNorm = 0.3512, lr_0 = 4.5127e-04
Loss = 2.1866e-02, PNorm = 167.4496, GNorm = 0.4205, lr_0 = 4.5096e-04
Loss = 2.0133e-02, PNorm = 167.4787, GNorm = 0.3259, lr_0 = 4.5065e-04
Loss = 2.5250e-02, PNorm = 167.5008, GNorm = 0.2783, lr_0 = 4.5034e-04
Loss = 1.9515e-02, PNorm = 167.5275, GNorm = 0.3461, lr_0 = 4.5003e-04
Loss = 1.8864e-02, PNorm = 167.5527, GNorm = 0.2193, lr_0 = 4.4972e-04
Loss = 2.5776e-02, PNorm = 167.5770, GNorm = 0.1897, lr_0 = 4.4942e-04
Loss = 1.8360e-02, PNorm = 167.6038, GNorm = 0.5554, lr_0 = 4.4911e-04
Loss = 2.5440e-02, PNorm = 167.6312, GNorm = 0.1855, lr_0 = 4.4880e-04
Loss = 1.9220e-02, PNorm = 167.6604, GNorm = 0.2269, lr_0 = 4.4849e-04
Loss = 2.2083e-02, PNorm = 167.6939, GNorm = 0.2623, lr_0 = 4.4819e-04
Loss = 2.3584e-02, PNorm = 167.7240, GNorm = 0.6837, lr_0 = 4.4788e-04
Loss = 2.1301e-02, PNorm = 167.7550, GNorm = 0.5246, lr_0 = 4.4757e-04
Loss = 2.4576e-02, PNorm = 167.7853, GNorm = 0.2993, lr_0 = 4.4727e-04
Loss = 2.9213e-02, PNorm = 167.8136, GNorm = 0.2883, lr_0 = 4.4696e-04
Loss = 2.0639e-02, PNorm = 167.8427, GNorm = 0.4367, lr_0 = 4.4665e-04
Loss = 4.7264e-02, PNorm = 167.8748, GNorm = 0.4740, lr_0 = 4.4635e-04
Loss = 1.9790e-02, PNorm = 167.9023, GNorm = 0.3113, lr_0 = 4.4604e-04
Loss = 1.9914e-02, PNorm = 167.9314, GNorm = 0.6006, lr_0 = 4.4574e-04
Loss = 2.1881e-02, PNorm = 167.9633, GNorm = 0.5245, lr_0 = 4.4543e-04
Loss = 2.4113e-02, PNorm = 167.9923, GNorm = 0.2595, lr_0 = 4.4513e-04
Loss = 3.3458e-02, PNorm = 168.0208, GNorm = 0.3089, lr_0 = 4.4482e-04
Loss = 2.0747e-02, PNorm = 168.0499, GNorm = 0.2200, lr_0 = 4.4452e-04
Loss = 2.3976e-02, PNorm = 168.0791, GNorm = 0.3626, lr_0 = 4.4421e-04
Loss = 3.7343e-02, PNorm = 168.1139, GNorm = 0.3816, lr_0 = 4.4391e-04
Loss = 2.0902e-02, PNorm = 168.1500, GNorm = 0.2774, lr_0 = 4.4360e-04
Loss = 2.4811e-02, PNorm = 168.1802, GNorm = 0.3508, lr_0 = 4.4330e-04
Loss = 2.4953e-02, PNorm = 168.2146, GNorm = 0.3711, lr_0 = 4.4299e-04
Loss = 2.5143e-02, PNorm = 168.2465, GNorm = 0.3609, lr_0 = 4.4269e-04
Loss = 2.4763e-02, PNorm = 168.2781, GNorm = 0.5340, lr_0 = 4.4239e-04
Loss = 1.8633e-02, PNorm = 168.3040, GNorm = 0.1905, lr_0 = 4.4209e-04
Loss = 2.3600e-02, PNorm = 168.3318, GNorm = 0.2365, lr_0 = 4.4178e-04
Loss = 2.0511e-02, PNorm = 168.3644, GNorm = 0.6603, lr_0 = 4.4148e-04
Loss = 2.6218e-02, PNorm = 168.3988, GNorm = 0.2787, lr_0 = 4.4118e-04
Loss = 2.9969e-02, PNorm = 168.4298, GNorm = 0.2393, lr_0 = 4.4088e-04
Loss = 3.2697e-02, PNorm = 168.4605, GNorm = 0.3430, lr_0 = 4.4057e-04
Loss = 2.2690e-02, PNorm = 168.4892, GNorm = 0.2187, lr_0 = 4.4027e-04
Loss = 2.4070e-02, PNorm = 168.5201, GNorm = 0.1852, lr_0 = 4.3997e-04
Loss = 1.7859e-02, PNorm = 168.5500, GNorm = 0.1880, lr_0 = 4.3967e-04
Loss = 2.5245e-02, PNorm = 168.5764, GNorm = 0.3246, lr_0 = 4.3937e-04
Validation mae = 0.122477
Epoch 12
Loss = 2.6319e-02, PNorm = 168.5976, GNorm = 0.3750, lr_0 = 4.3907e-04
Loss = 2.3384e-02, PNorm = 168.6249, GNorm = 0.4300, lr_0 = 4.3877e-04
Loss = 2.2312e-02, PNorm = 168.6456, GNorm = 0.2739, lr_0 = 4.3846e-04
Loss = 2.3128e-02, PNorm = 168.6687, GNorm = 0.4558, lr_0 = 4.3816e-04
Loss = 3.1556e-02, PNorm = 168.6914, GNorm = 0.4438, lr_0 = 4.3786e-04
Loss = 2.0492e-02, PNorm = 168.7193, GNorm = 0.1764, lr_0 = 4.3756e-04
Loss = 2.0269e-02, PNorm = 168.7375, GNorm = 0.7103, lr_0 = 4.3726e-04
Loss = 1.9762e-02, PNorm = 168.7564, GNorm = 0.6734, lr_0 = 4.3696e-04
Loss = 1.4246e-02, PNorm = 168.7792, GNorm = 0.3615, lr_0 = 4.3667e-04
Loss = 1.8804e-02, PNorm = 168.7996, GNorm = 0.2250, lr_0 = 4.3637e-04
Loss = 1.8802e-02, PNorm = 168.8207, GNorm = 0.1836, lr_0 = 4.3607e-04
Loss = 1.5613e-02, PNorm = 168.8417, GNorm = 0.3000, lr_0 = 4.3577e-04
Loss = 2.0326e-02, PNorm = 168.8611, GNorm = 0.3073, lr_0 = 4.3547e-04
Loss = 2.2829e-02, PNorm = 168.8816, GNorm = 0.3351, lr_0 = 4.3517e-04
Loss = 1.6968e-02, PNorm = 168.9011, GNorm = 0.2860, lr_0 = 4.3487e-04
Loss = 1.9423e-02, PNorm = 168.9201, GNorm = 0.1775, lr_0 = 4.3458e-04
Loss = 1.9253e-02, PNorm = 168.9408, GNorm = 0.3377, lr_0 = 4.3428e-04
Loss = 1.6607e-02, PNorm = 168.9601, GNorm = 0.5696, lr_0 = 4.3398e-04
Loss = 1.5319e-02, PNorm = 168.9804, GNorm = 0.1907, lr_0 = 4.3368e-04
Loss = 2.0061e-02, PNorm = 169.0019, GNorm = 0.4510, lr_0 = 4.3339e-04
Loss = 1.7918e-02, PNorm = 169.0223, GNorm = 0.2817, lr_0 = 4.3309e-04
Loss = 1.8205e-02, PNorm = 169.0403, GNorm = 0.3447, lr_0 = 4.3279e-04
Loss = 1.9735e-02, PNorm = 169.0596, GNorm = 0.3919, lr_0 = 4.3250e-04
Loss = 2.0821e-02, PNorm = 169.0800, GNorm = 0.4621, lr_0 = 4.3220e-04
Loss = 2.7954e-02, PNorm = 169.1011, GNorm = 0.2569, lr_0 = 4.3190e-04
Loss = 1.7292e-02, PNorm = 169.1257, GNorm = 0.1852, lr_0 = 4.3161e-04
Loss = 2.0259e-02, PNorm = 169.1475, GNorm = 0.2379, lr_0 = 4.3131e-04
Loss = 1.9493e-02, PNorm = 169.1726, GNorm = 0.5075, lr_0 = 4.3102e-04
Loss = 2.6196e-02, PNorm = 169.1955, GNorm = 0.2359, lr_0 = 4.3072e-04
Loss = 1.9182e-02, PNorm = 169.2147, GNorm = 0.2228, lr_0 = 4.3043e-04
Loss = 2.1774e-02, PNorm = 169.2359, GNorm = 0.3103, lr_0 = 4.3013e-04
Loss = 1.7954e-02, PNorm = 169.2588, GNorm = 0.4362, lr_0 = 4.2984e-04
Loss = 1.4757e-02, PNorm = 169.2770, GNorm = 0.2564, lr_0 = 4.2954e-04
Loss = 1.7114e-02, PNorm = 169.2939, GNorm = 0.3186, lr_0 = 4.2925e-04
Loss = 1.9155e-02, PNorm = 169.3115, GNorm = 0.3294, lr_0 = 4.2895e-04
Loss = 2.1821e-02, PNorm = 169.3333, GNorm = 0.3019, lr_0 = 4.2866e-04
Loss = 1.7720e-02, PNorm = 169.3572, GNorm = 0.6484, lr_0 = 4.2837e-04
Loss = 2.3540e-02, PNorm = 169.3799, GNorm = 0.3653, lr_0 = 4.2807e-04
Loss = 1.6949e-02, PNorm = 169.3986, GNorm = 0.1972, lr_0 = 4.2778e-04
Loss = 2.0831e-02, PNorm = 169.4159, GNorm = 0.1957, lr_0 = 4.2749e-04
Loss = 2.4028e-02, PNorm = 169.4392, GNorm = 0.3253, lr_0 = 4.2719e-04
Loss = 1.9058e-02, PNorm = 169.4587, GNorm = 0.2435, lr_0 = 4.2690e-04
Loss = 1.7280e-02, PNorm = 169.4788, GNorm = 0.2966, lr_0 = 4.2661e-04
Loss = 1.4281e-02, PNorm = 169.4991, GNorm = 0.2436, lr_0 = 4.2632e-04
Loss = 1.5962e-02, PNorm = 169.5166, GNorm = 0.7110, lr_0 = 4.2602e-04
Loss = 1.6078e-02, PNorm = 169.5383, GNorm = 0.2946, lr_0 = 4.2573e-04
Loss = 2.3075e-02, PNorm = 169.5576, GNorm = 0.2399, lr_0 = 4.2544e-04
Loss = 2.1089e-02, PNorm = 169.5778, GNorm = 0.3265, lr_0 = 4.2515e-04
Loss = 1.8937e-02, PNorm = 169.6023, GNorm = 0.2147, lr_0 = 4.2486e-04
Loss = 1.6245e-02, PNorm = 169.6289, GNorm = 0.1626, lr_0 = 4.2457e-04
Loss = 1.8757e-02, PNorm = 169.6497, GNorm = 0.3801, lr_0 = 4.2428e-04
Loss = 2.0425e-02, PNorm = 169.6729, GNorm = 0.1449, lr_0 = 4.2399e-04
Loss = 2.7610e-02, PNorm = 169.6983, GNorm = 0.1982, lr_0 = 4.2370e-04
Loss = 1.9320e-02, PNorm = 169.7222, GNorm = 0.2498, lr_0 = 4.2340e-04
Loss = 2.9582e-02, PNorm = 169.7470, GNorm = 0.2487, lr_0 = 4.2311e-04
Loss = 1.6122e-02, PNorm = 169.7699, GNorm = 0.2170, lr_0 = 4.2283e-04
Loss = 2.4224e-02, PNorm = 169.7917, GNorm = 0.3289, lr_0 = 4.2254e-04
Loss = 1.6291e-02, PNorm = 169.8123, GNorm = 0.3307, lr_0 = 4.2225e-04
Loss = 1.9647e-02, PNorm = 169.8378, GNorm = 0.3387, lr_0 = 4.2196e-04
Loss = 1.6265e-02, PNorm = 169.8582, GNorm = 0.2570, lr_0 = 4.2167e-04
Loss = 1.8437e-02, PNorm = 169.8827, GNorm = 0.3906, lr_0 = 4.2138e-04
Loss = 2.4125e-02, PNorm = 169.9112, GNorm = 0.5830, lr_0 = 4.2109e-04
Loss = 1.6220e-02, PNorm = 169.9395, GNorm = 0.2846, lr_0 = 4.2080e-04
Loss = 1.7736e-02, PNorm = 169.9658, GNorm = 0.3059, lr_0 = 4.2051e-04
Loss = 1.6408e-02, PNorm = 169.9874, GNorm = 0.1434, lr_0 = 4.2023e-04
Loss = 1.7568e-02, PNorm = 170.0073, GNorm = 0.1933, lr_0 = 4.1994e-04
Loss = 2.6258e-02, PNorm = 170.0311, GNorm = 0.3797, lr_0 = 4.1965e-04
Loss = 1.7352e-02, PNorm = 170.0584, GNorm = 0.2761, lr_0 = 4.1936e-04
Loss = 1.7437e-02, PNorm = 170.0807, GNorm = 0.2049, lr_0 = 4.1907e-04
Loss = 2.6752e-02, PNorm = 170.1018, GNorm = 0.3658, lr_0 = 4.1879e-04
Loss = 1.7096e-02, PNorm = 170.1227, GNorm = 0.5293, lr_0 = 4.1850e-04
Loss = 2.0372e-02, PNorm = 170.1480, GNorm = 0.5018, lr_0 = 4.1821e-04
Loss = 2.2134e-02, PNorm = 170.1698, GNorm = 0.2806, lr_0 = 4.1793e-04
Loss = 1.9455e-02, PNorm = 170.1940, GNorm = 0.4072, lr_0 = 4.1764e-04
Loss = 1.7361e-02, PNorm = 170.2188, GNorm = 0.3805, lr_0 = 4.1736e-04
Loss = 1.9483e-02, PNorm = 170.2407, GNorm = 0.4224, lr_0 = 4.1707e-04
Loss = 1.5810e-02, PNorm = 170.2637, GNorm = 0.4260, lr_0 = 4.1678e-04
Loss = 1.9643e-02, PNorm = 170.2917, GNorm = 0.2791, lr_0 = 4.1650e-04
Loss = 2.5092e-02, PNorm = 170.3137, GNorm = 0.2044, lr_0 = 4.1621e-04
Loss = 1.7901e-02, PNorm = 170.3371, GNorm = 0.3470, lr_0 = 4.1593e-04
Loss = 1.7634e-02, PNorm = 170.3604, GNorm = 0.2001, lr_0 = 4.1564e-04
Loss = 1.4403e-02, PNorm = 170.3815, GNorm = 0.1423, lr_0 = 4.1536e-04
Loss = 1.4471e-02, PNorm = 170.4027, GNorm = 0.3061, lr_0 = 4.1507e-04
Loss = 1.6064e-02, PNorm = 170.4222, GNorm = 0.3793, lr_0 = 4.1479e-04
Loss = 2.2489e-02, PNorm = 170.4438, GNorm = 0.3796, lr_0 = 4.1450e-04
Loss = 1.7506e-02, PNorm = 170.4634, GNorm = 0.6349, lr_0 = 4.1422e-04
Loss = 1.5664e-02, PNorm = 170.4868, GNorm = 0.3013, lr_0 = 4.1394e-04
Loss = 1.6330e-02, PNorm = 170.5106, GNorm = 0.3697, lr_0 = 4.1365e-04
Loss = 1.8560e-02, PNorm = 170.5345, GNorm = 0.2598, lr_0 = 4.1337e-04
Loss = 1.9518e-02, PNorm = 170.5594, GNorm = 0.4238, lr_0 = 4.1309e-04
Loss = 2.4908e-02, PNorm = 170.5786, GNorm = 0.3988, lr_0 = 4.1280e-04
Loss = 1.8931e-02, PNorm = 170.6026, GNorm = 0.4566, lr_0 = 4.1252e-04
Loss = 1.6033e-02, PNorm = 170.6309, GNorm = 0.1770, lr_0 = 4.1224e-04
Loss = 1.6703e-02, PNorm = 170.6588, GNorm = 0.1588, lr_0 = 4.1196e-04
Loss = 2.1256e-02, PNorm = 170.6849, GNorm = 0.2038, lr_0 = 4.1167e-04
Loss = 2.3610e-02, PNorm = 170.7083, GNorm = 0.3955, lr_0 = 4.1139e-04
Loss = 2.0776e-02, PNorm = 170.7356, GNorm = 0.4084, lr_0 = 4.1111e-04
Loss = 2.0060e-02, PNorm = 170.7651, GNorm = 0.3127, lr_0 = 4.1083e-04
Loss = 1.6317e-02, PNorm = 170.7924, GNorm = 0.2770, lr_0 = 4.1055e-04
Loss = 4.1139e-02, PNorm = 170.8210, GNorm = 0.2998, lr_0 = 4.1027e-04
Loss = 2.4785e-02, PNorm = 170.8441, GNorm = 0.3688, lr_0 = 4.0998e-04
Loss = 1.6811e-02, PNorm = 170.8702, GNorm = 0.2470, lr_0 = 4.0970e-04
Loss = 1.5485e-02, PNorm = 170.8922, GNorm = 0.2110, lr_0 = 4.0942e-04
Loss = 1.7921e-02, PNorm = 170.9177, GNorm = 0.3683, lr_0 = 4.0914e-04
Loss = 2.0760e-02, PNorm = 170.9400, GNorm = 0.4678, lr_0 = 4.0886e-04
Loss = 1.7987e-02, PNorm = 170.9645, GNorm = 0.2904, lr_0 = 4.0858e-04
Loss = 1.8895e-02, PNorm = 170.9870, GNorm = 0.2406, lr_0 = 4.0830e-04
Loss = 2.0665e-02, PNorm = 171.0095, GNorm = 0.2858, lr_0 = 4.0802e-04
Loss = 1.6925e-02, PNorm = 171.0331, GNorm = 0.5123, lr_0 = 4.0774e-04
Loss = 1.7178e-02, PNorm = 171.0610, GNorm = 0.3490, lr_0 = 4.0746e-04
Loss = 1.7607e-02, PNorm = 171.0862, GNorm = 0.4522, lr_0 = 4.0718e-04
Loss = 2.8085e-02, PNorm = 171.1067, GNorm = 0.4238, lr_0 = 4.0691e-04
Loss = 1.8717e-02, PNorm = 171.1278, GNorm = 0.5505, lr_0 = 4.0663e-04
Loss = 1.7132e-02, PNorm = 171.1533, GNorm = 0.2500, lr_0 = 4.0635e-04
Loss = 2.1958e-02, PNorm = 171.1775, GNorm = 0.2025, lr_0 = 4.0607e-04
Loss = 2.0780e-02, PNorm = 171.2003, GNorm = 0.3249, lr_0 = 4.0579e-04
Loss = 1.9064e-02, PNorm = 171.2275, GNorm = 0.2531, lr_0 = 4.0551e-04
Loss = 1.7992e-02, PNorm = 171.2500, GNorm = 0.2654, lr_0 = 4.0524e-04
Loss = 2.2715e-02, PNorm = 171.2737, GNorm = 0.6394, lr_0 = 4.0496e-04
Loss = 1.8694e-02, PNorm = 171.2931, GNorm = 0.2543, lr_0 = 4.0468e-04
Validation mae = 0.121711
Epoch 13
Loss = 2.7331e-02, PNorm = 171.3160, GNorm = 0.2790, lr_0 = 4.0440e-04
Loss = 1.8955e-02, PNorm = 171.3382, GNorm = 0.2833, lr_0 = 4.0413e-04
Loss = 1.7117e-02, PNorm = 171.3555, GNorm = 0.1740, lr_0 = 4.0385e-04
Loss = 1.8292e-02, PNorm = 171.3717, GNorm = 0.7834, lr_0 = 4.0357e-04
Loss = 2.4839e-02, PNorm = 171.3905, GNorm = 0.2866, lr_0 = 4.0330e-04
Loss = 1.3356e-02, PNorm = 171.4088, GNorm = 0.2014, lr_0 = 4.0302e-04
Loss = 1.4114e-02, PNorm = 171.4272, GNorm = 0.3247, lr_0 = 4.0274e-04
Loss = 1.9139e-02, PNorm = 171.4443, GNorm = 0.5196, lr_0 = 4.0247e-04
Loss = 1.5623e-02, PNorm = 171.4604, GNorm = 0.3621, lr_0 = 4.0219e-04
Loss = 1.3847e-02, PNorm = 171.4791, GNorm = 0.2455, lr_0 = 4.0192e-04
Loss = 1.6911e-02, PNorm = 171.4959, GNorm = 0.3388, lr_0 = 4.0164e-04
Loss = 1.6114e-02, PNorm = 171.5139, GNorm = 0.3642, lr_0 = 4.0137e-04
Loss = 1.8015e-02, PNorm = 171.5299, GNorm = 0.9379, lr_0 = 4.0109e-04
Loss = 1.7049e-02, PNorm = 171.5515, GNorm = 0.4802, lr_0 = 4.0082e-04
Loss = 1.4451e-02, PNorm = 171.5709, GNorm = 0.1390, lr_0 = 4.0054e-04
Loss = 1.7790e-02, PNorm = 171.5922, GNorm = 0.1403, lr_0 = 4.0027e-04
Loss = 1.9656e-02, PNorm = 171.6090, GNorm = 0.3831, lr_0 = 3.9999e-04
Loss = 1.7213e-02, PNorm = 171.6228, GNorm = 0.3066, lr_0 = 3.9972e-04
Loss = 1.4491e-02, PNorm = 171.6375, GNorm = 0.3233, lr_0 = 3.9945e-04
Loss = 1.5332e-02, PNorm = 171.6565, GNorm = 0.1899, lr_0 = 3.9917e-04
Loss = 1.4722e-02, PNorm = 171.6767, GNorm = 0.2455, lr_0 = 3.9890e-04
Loss = 1.7201e-02, PNorm = 171.6948, GNorm = 0.4707, lr_0 = 3.9863e-04
Loss = 1.9408e-02, PNorm = 171.7175, GNorm = 0.3517, lr_0 = 3.9835e-04
Loss = 1.7415e-02, PNorm = 171.7401, GNorm = 0.1435, lr_0 = 3.9808e-04
Loss = 1.1373e-02, PNorm = 171.7606, GNorm = 0.1715, lr_0 = 3.9781e-04
Loss = 1.5853e-02, PNorm = 171.7783, GNorm = 0.3221, lr_0 = 3.9753e-04
Loss = 1.6474e-02, PNorm = 171.7940, GNorm = 0.3649, lr_0 = 3.9726e-04
Loss = 1.4495e-02, PNorm = 171.8091, GNorm = 0.3321, lr_0 = 3.9699e-04
Loss = 1.7046e-02, PNorm = 171.8246, GNorm = 0.2248, lr_0 = 3.9672e-04
Loss = 1.3298e-02, PNorm = 171.8401, GNorm = 0.2373, lr_0 = 3.9645e-04
Loss = 1.2834e-02, PNorm = 171.8553, GNorm = 0.2087, lr_0 = 3.9617e-04
Loss = 1.5903e-02, PNorm = 171.8729, GNorm = 0.2897, lr_0 = 3.9590e-04
Loss = 1.6428e-02, PNorm = 171.8871, GNorm = 0.2165, lr_0 = 3.9563e-04
Loss = 1.8619e-02, PNorm = 171.9040, GNorm = 0.2083, lr_0 = 3.9536e-04
Loss = 1.5521e-02, PNorm = 171.9221, GNorm = 0.2921, lr_0 = 3.9509e-04
Loss = 1.8634e-02, PNorm = 171.9399, GNorm = 0.4068, lr_0 = 3.9482e-04
Loss = 1.9818e-02, PNorm = 171.9643, GNorm = 0.4153, lr_0 = 3.9455e-04
Loss = 1.7107e-02, PNorm = 171.9865, GNorm = 0.3718, lr_0 = 3.9428e-04
Loss = 1.3687e-02, PNorm = 172.0066, GNorm = 0.2621, lr_0 = 3.9401e-04
Loss = 2.1217e-02, PNorm = 172.0250, GNorm = 0.2743, lr_0 = 3.9374e-04
Loss = 2.3969e-02, PNorm = 172.0460, GNorm = 0.1793, lr_0 = 3.9347e-04
Loss = 1.5284e-02, PNorm = 172.0628, GNorm = 0.2499, lr_0 = 3.9320e-04
Loss = 1.7392e-02, PNorm = 172.0789, GNorm = 0.4503, lr_0 = 3.9293e-04
Loss = 1.2373e-02, PNorm = 172.0959, GNorm = 0.1523, lr_0 = 3.9266e-04
Loss = 1.3564e-02, PNorm = 172.1094, GNorm = 0.3806, lr_0 = 3.9239e-04
Loss = 1.3307e-02, PNorm = 172.1258, GNorm = 0.4061, lr_0 = 3.9212e-04
Loss = 1.9257e-02, PNorm = 172.1435, GNorm = 0.2239, lr_0 = 3.9185e-04
Loss = 2.6153e-02, PNorm = 172.1655, GNorm = 0.4797, lr_0 = 3.9159e-04
Loss = 1.5543e-02, PNorm = 172.1869, GNorm = 0.3512, lr_0 = 3.9132e-04
Loss = 2.1055e-02, PNorm = 172.2060, GNorm = 0.5200, lr_0 = 3.9105e-04
Loss = 2.7027e-02, PNorm = 172.2283, GNorm = 0.6014, lr_0 = 3.9078e-04
Loss = 1.2087e-02, PNorm = 172.2505, GNorm = 0.2770, lr_0 = 3.9051e-04
Loss = 1.9920e-02, PNorm = 172.2683, GNorm = 0.2887, lr_0 = 3.9025e-04
Loss = 1.6986e-02, PNorm = 172.2863, GNorm = 0.2444, lr_0 = 3.8998e-04
Loss = 1.6455e-02, PNorm = 172.3066, GNorm = 0.2889, lr_0 = 3.8971e-04
Loss = 2.4916e-02, PNorm = 172.3220, GNorm = 0.2822, lr_0 = 3.8945e-04
Loss = 1.2804e-02, PNorm = 172.3430, GNorm = 0.1290, lr_0 = 3.8918e-04
Loss = 1.6429e-02, PNorm = 172.3623, GNorm = 0.3786, lr_0 = 3.8891e-04
Loss = 1.5688e-02, PNorm = 172.3838, GNorm = 0.1801, lr_0 = 3.8865e-04
Loss = 1.9560e-02, PNorm = 172.4038, GNorm = 0.2177, lr_0 = 3.8838e-04
Loss = 1.6737e-02, PNorm = 172.4238, GNorm = 0.2510, lr_0 = 3.8811e-04
Loss = 1.6331e-02, PNorm = 172.4413, GNorm = 0.2207, lr_0 = 3.8785e-04
Loss = 1.7555e-02, PNorm = 172.4586, GNorm = 0.4748, lr_0 = 3.8758e-04
Loss = 3.0782e-02, PNorm = 172.4795, GNorm = 0.4909, lr_0 = 3.8732e-04
Loss = 1.6180e-02, PNorm = 172.4955, GNorm = 0.4452, lr_0 = 3.8705e-04
Loss = 1.9327e-02, PNorm = 172.5162, GNorm = 0.1860, lr_0 = 3.8679e-04
Loss = 1.2264e-02, PNorm = 172.5353, GNorm = 0.1652, lr_0 = 3.8652e-04
Loss = 1.6248e-02, PNorm = 172.5542, GNorm = 0.2084, lr_0 = 3.8626e-04
Loss = 1.5260e-02, PNorm = 172.5692, GNorm = 0.4015, lr_0 = 3.8599e-04
Loss = 2.2804e-02, PNorm = 172.5862, GNorm = 0.2205, lr_0 = 3.8573e-04
Loss = 1.5587e-02, PNorm = 172.6044, GNorm = 0.2728, lr_0 = 3.8546e-04
Loss = 1.8422e-02, PNorm = 172.6272, GNorm = 0.2247, lr_0 = 3.8520e-04
Loss = 1.7304e-02, PNorm = 172.6481, GNorm = 0.3913, lr_0 = 3.8493e-04
Loss = 1.3019e-02, PNorm = 172.6649, GNorm = 0.3153, lr_0 = 3.8467e-04
Loss = 1.5092e-02, PNorm = 172.6835, GNorm = 0.3369, lr_0 = 3.8441e-04
Loss = 1.2009e-02, PNorm = 172.7024, GNorm = 0.3193, lr_0 = 3.8414e-04
Loss = 1.3384e-02, PNorm = 172.7198, GNorm = 0.1663, lr_0 = 3.8388e-04
Loss = 1.4229e-02, PNorm = 172.7380, GNorm = 0.2600, lr_0 = 3.8362e-04
Loss = 1.7752e-02, PNorm = 172.7566, GNorm = 0.1849, lr_0 = 3.8336e-04
Loss = 1.3946e-02, PNorm = 172.7763, GNorm = 0.2628, lr_0 = 3.8309e-04
Loss = 1.9133e-02, PNorm = 172.7941, GNorm = 0.2981, lr_0 = 3.8283e-04
Loss = 1.4266e-02, PNorm = 172.8078, GNorm = 0.1616, lr_0 = 3.8257e-04
Loss = 1.7833e-02, PNorm = 172.8277, GNorm = 0.4762, lr_0 = 3.8231e-04
Loss = 1.5558e-02, PNorm = 172.8513, GNorm = 0.2760, lr_0 = 3.8204e-04
Loss = 2.2205e-02, PNorm = 172.8716, GNorm = 0.4403, lr_0 = 3.8178e-04
Loss = 1.9039e-02, PNorm = 172.8914, GNorm = 0.3460, lr_0 = 3.8152e-04
Loss = 2.1821e-02, PNorm = 172.9125, GNorm = 0.3862, lr_0 = 3.8126e-04
Loss = 1.2372e-02, PNorm = 172.9339, GNorm = 0.2333, lr_0 = 3.8100e-04
Loss = 1.4313e-02, PNorm = 172.9524, GNorm = 0.2179, lr_0 = 3.8074e-04
Loss = 1.3066e-02, PNorm = 172.9711, GNorm = 0.1971, lr_0 = 3.8048e-04
Loss = 1.8064e-02, PNorm = 172.9868, GNorm = 0.1427, lr_0 = 3.8022e-04
Loss = 1.5636e-02, PNorm = 173.0048, GNorm = 0.1698, lr_0 = 3.7995e-04
Loss = 1.2629e-02, PNorm = 173.0248, GNorm = 0.1245, lr_0 = 3.7969e-04
Loss = 1.6543e-02, PNorm = 173.0470, GNorm = 0.3088, lr_0 = 3.7943e-04
Loss = 1.6403e-02, PNorm = 173.0654, GNorm = 0.2265, lr_0 = 3.7917e-04
Loss = 1.3940e-02, PNorm = 173.0805, GNorm = 0.2322, lr_0 = 3.7891e-04
Loss = 1.1660e-02, PNorm = 173.0967, GNorm = 0.2873, lr_0 = 3.7866e-04
Loss = 1.4619e-02, PNorm = 173.1153, GNorm = 0.2010, lr_0 = 3.7840e-04
Loss = 1.6194e-02, PNorm = 173.1347, GNorm = 0.2129, lr_0 = 3.7814e-04
Loss = 1.5553e-02, PNorm = 173.1529, GNorm = 0.2608, lr_0 = 3.7788e-04
Loss = 1.3420e-02, PNorm = 173.1711, GNorm = 0.1779, lr_0 = 3.7762e-04
Loss = 1.4945e-02, PNorm = 173.1898, GNorm = 0.1778, lr_0 = 3.7736e-04
Loss = 1.1835e-02, PNorm = 173.2081, GNorm = 0.1817, lr_0 = 3.7710e-04
Loss = 1.5942e-02, PNorm = 173.2258, GNorm = 0.2207, lr_0 = 3.7684e-04
Loss = 1.6530e-02, PNorm = 173.2467, GNorm = 0.2354, lr_0 = 3.7659e-04
Loss = 1.4691e-02, PNorm = 173.2659, GNorm = 0.1738, lr_0 = 3.7633e-04
Loss = 1.6756e-02, PNorm = 173.2847, GNorm = 0.2536, lr_0 = 3.7607e-04
Loss = 1.6639e-02, PNorm = 173.3041, GNorm = 0.1920, lr_0 = 3.7581e-04
Loss = 1.3104e-02, PNorm = 173.3241, GNorm = 0.4489, lr_0 = 3.7555e-04
Loss = 1.8278e-02, PNorm = 173.3457, GNorm = 0.1858, lr_0 = 3.7530e-04
Loss = 1.3838e-02, PNorm = 173.3659, GNorm = 0.1197, lr_0 = 3.7504e-04
Loss = 2.1882e-02, PNorm = 173.3838, GNorm = 0.5574, lr_0 = 3.7478e-04
Loss = 1.6104e-02, PNorm = 173.4051, GNorm = 0.2733, lr_0 = 3.7453e-04
Loss = 1.8838e-02, PNorm = 173.4274, GNorm = 0.1297, lr_0 = 3.7427e-04
Loss = 1.7969e-02, PNorm = 173.4481, GNorm = 0.3564, lr_0 = 3.7401e-04
Loss = 1.5219e-02, PNorm = 173.4680, GNorm = 0.2823, lr_0 = 3.7376e-04
Loss = 1.6143e-02, PNorm = 173.4865, GNorm = 0.1920, lr_0 = 3.7350e-04
Loss = 1.4826e-02, PNorm = 173.5054, GNorm = 0.4232, lr_0 = 3.7325e-04
Loss = 1.3981e-02, PNorm = 173.5234, GNorm = 0.2837, lr_0 = 3.7299e-04
Loss = 1.1930e-02, PNorm = 173.5395, GNorm = 0.2935, lr_0 = 3.7273e-04
Validation mae = 0.121298
Epoch 14
Loss = 1.4639e-02, PNorm = 173.5544, GNorm = 0.1694, lr_0 = 3.7248e-04
Loss = 1.3980e-02, PNorm = 173.5686, GNorm = 0.2295, lr_0 = 3.7222e-04
Loss = 1.1393e-02, PNorm = 173.5839, GNorm = 0.4091, lr_0 = 3.7197e-04
Loss = 1.7694e-02, PNorm = 173.5978, GNorm = 0.3851, lr_0 = 3.7171e-04
Loss = 1.3837e-02, PNorm = 173.6099, GNorm = 0.2848, lr_0 = 3.7146e-04
Loss = 1.1973e-02, PNorm = 173.6249, GNorm = 0.1817, lr_0 = 3.7120e-04
Loss = 1.9159e-02, PNorm = 173.6407, GNorm = 0.4166, lr_0 = 3.7095e-04
Loss = 1.5390e-02, PNorm = 173.6529, GNorm = 0.1658, lr_0 = 3.7070e-04
Loss = 1.3251e-02, PNorm = 173.6681, GNorm = 0.2670, lr_0 = 3.7044e-04
Loss = 1.0186e-02, PNorm = 173.6811, GNorm = 0.3077, lr_0 = 3.7019e-04
Loss = 1.1365e-02, PNorm = 173.6909, GNorm = 0.1361, lr_0 = 3.6993e-04
Loss = 1.8438e-02, PNorm = 173.7032, GNorm = 0.4461, lr_0 = 3.6968e-04
Loss = 1.1839e-02, PNorm = 173.7174, GNorm = 0.5684, lr_0 = 3.6943e-04
Loss = 1.7882e-02, PNorm = 173.7295, GNorm = 0.5530, lr_0 = 3.6917e-04
Loss = 1.6999e-02, PNorm = 173.7434, GNorm = 0.3747, lr_0 = 3.6892e-04
Loss = 1.3377e-02, PNorm = 173.7564, GNorm = 0.1842, lr_0 = 3.6867e-04
Loss = 1.3798e-02, PNorm = 173.7744, GNorm = 0.7680, lr_0 = 3.6842e-04
Loss = 1.1671e-02, PNorm = 173.7897, GNorm = 0.4242, lr_0 = 3.6816e-04
Loss = 1.1724e-02, PNorm = 173.8021, GNorm = 0.2997, lr_0 = 3.6791e-04
Loss = 1.1018e-02, PNorm = 173.8132, GNorm = 0.1381, lr_0 = 3.6766e-04
Loss = 1.1326e-02, PNorm = 173.8260, GNorm = 0.2472, lr_0 = 3.6741e-04
Loss = 1.2474e-02, PNorm = 173.8389, GNorm = 0.4815, lr_0 = 3.6716e-04
Loss = 1.2752e-02, PNorm = 173.8504, GNorm = 0.1272, lr_0 = 3.6690e-04
Loss = 1.9675e-02, PNorm = 173.8645, GNorm = 0.6138, lr_0 = 3.6665e-04
Loss = 1.4641e-02, PNorm = 173.8785, GNorm = 0.3098, lr_0 = 3.6640e-04
Loss = 1.6016e-02, PNorm = 173.8926, GNorm = 0.1715, lr_0 = 3.6615e-04
Loss = 1.6200e-02, PNorm = 173.9074, GNorm = 0.3965, lr_0 = 3.6590e-04
Loss = 1.9377e-02, PNorm = 173.9221, GNorm = 0.3552, lr_0 = 3.6565e-04
Loss = 1.4606e-02, PNorm = 173.9379, GNorm = 0.1743, lr_0 = 3.6540e-04
Loss = 1.1366e-02, PNorm = 173.9533, GNorm = 0.2470, lr_0 = 3.6515e-04
Loss = 1.3220e-02, PNorm = 173.9701, GNorm = 0.2483, lr_0 = 3.6490e-04
Loss = 1.2258e-02, PNorm = 173.9867, GNorm = 0.6144, lr_0 = 3.6465e-04
Loss = 1.2379e-02, PNorm = 174.0012, GNorm = 0.4248, lr_0 = 3.6440e-04
Loss = 1.0605e-02, PNorm = 174.0140, GNorm = 0.1222, lr_0 = 3.6415e-04
Loss = 1.0313e-02, PNorm = 174.0251, GNorm = 0.3032, lr_0 = 3.6390e-04
Loss = 1.4843e-02, PNorm = 174.0359, GNorm = 0.6488, lr_0 = 3.6365e-04
Loss = 1.0640e-02, PNorm = 174.0527, GNorm = 0.1660, lr_0 = 3.6340e-04
Loss = 1.6759e-02, PNorm = 174.0659, GNorm = 0.1870, lr_0 = 3.6315e-04
Loss = 1.3509e-02, PNorm = 174.0801, GNorm = 0.1343, lr_0 = 3.6290e-04
Loss = 9.9703e-03, PNorm = 174.0907, GNorm = 0.3177, lr_0 = 3.6266e-04
Loss = 1.3432e-02, PNorm = 174.1042, GNorm = 0.2589, lr_0 = 3.6241e-04
Loss = 2.1970e-02, PNorm = 174.1179, GNorm = 0.2817, lr_0 = 3.6216e-04
Loss = 1.4232e-02, PNorm = 174.1297, GNorm = 0.2084, lr_0 = 3.6191e-04
Loss = 1.7422e-02, PNorm = 174.1435, GNorm = 0.2220, lr_0 = 3.6166e-04
Loss = 1.2437e-02, PNorm = 174.1548, GNorm = 0.1985, lr_0 = 3.6141e-04
Loss = 1.3849e-02, PNorm = 174.1692, GNorm = 0.1493, lr_0 = 3.6117e-04
Loss = 1.1188e-02, PNorm = 174.1859, GNorm = 0.3347, lr_0 = 3.6092e-04
Loss = 1.9476e-02, PNorm = 174.2059, GNorm = 0.2555, lr_0 = 3.6067e-04
Loss = 2.6638e-02, PNorm = 174.2248, GNorm = 0.1674, lr_0 = 3.6043e-04
Loss = 1.3591e-02, PNorm = 174.2422, GNorm = 0.2936, lr_0 = 3.6018e-04
Loss = 1.5046e-02, PNorm = 174.2552, GNorm = 0.2495, lr_0 = 3.5993e-04
Loss = 1.4184e-02, PNorm = 174.2706, GNorm = 0.2845, lr_0 = 3.5969e-04
Loss = 1.5236e-02, PNorm = 174.2872, GNorm = 0.1629, lr_0 = 3.5944e-04
Loss = 1.2531e-02, PNorm = 174.3055, GNorm = 0.2346, lr_0 = 3.5919e-04
Loss = 1.4030e-02, PNorm = 174.3200, GNorm = 0.3026, lr_0 = 3.5895e-04
Loss = 1.9360e-02, PNorm = 174.3387, GNorm = 0.2901, lr_0 = 3.5870e-04
Loss = 1.7742e-02, PNorm = 174.3553, GNorm = 0.2562, lr_0 = 3.5845e-04
Loss = 1.0993e-02, PNorm = 174.3703, GNorm = 0.1649, lr_0 = 3.5821e-04
Loss = 1.2861e-02, PNorm = 174.3858, GNorm = 0.1719, lr_0 = 3.5796e-04
Loss = 1.2566e-02, PNorm = 174.3996, GNorm = 0.3256, lr_0 = 3.5772e-04
Loss = 2.0406e-02, PNorm = 174.4166, GNorm = 0.3779, lr_0 = 3.5747e-04
Loss = 1.3442e-02, PNorm = 174.4345, GNorm = 0.3234, lr_0 = 3.5723e-04
Loss = 1.1833e-02, PNorm = 174.4478, GNorm = 0.2580, lr_0 = 3.5698e-04
Loss = 1.6719e-02, PNorm = 174.4640, GNorm = 0.2762, lr_0 = 3.5674e-04
Loss = 1.3911e-02, PNorm = 174.4821, GNorm = 0.2106, lr_0 = 3.5650e-04
Loss = 1.3155e-02, PNorm = 174.4988, GNorm = 0.3208, lr_0 = 3.5625e-04
Loss = 1.2175e-02, PNorm = 174.5159, GNorm = 0.2836, lr_0 = 3.5601e-04
Loss = 1.2730e-02, PNorm = 174.5315, GNorm = 0.1515, lr_0 = 3.5576e-04
Loss = 1.0660e-02, PNorm = 174.5454, GNorm = 0.1781, lr_0 = 3.5552e-04
Loss = 2.1684e-02, PNorm = 174.5605, GNorm = 0.2870, lr_0 = 3.5528e-04
Loss = 1.3285e-02, PNorm = 174.5741, GNorm = 0.4322, lr_0 = 3.5503e-04
Loss = 1.2170e-02, PNorm = 174.5900, GNorm = 0.2852, lr_0 = 3.5479e-04
Loss = 1.1310e-02, PNorm = 174.6052, GNorm = 0.2020, lr_0 = 3.5455e-04
Loss = 1.5572e-02, PNorm = 174.6169, GNorm = 0.1445, lr_0 = 3.5430e-04
Loss = 1.6682e-02, PNorm = 174.6324, GNorm = 0.3019, lr_0 = 3.5406e-04
Loss = 1.5079e-02, PNorm = 174.6467, GNorm = 0.3866, lr_0 = 3.5382e-04
Loss = 1.4762e-02, PNorm = 174.6695, GNorm = 0.3850, lr_0 = 3.5358e-04
Loss = 1.6744e-02, PNorm = 174.6865, GNorm = 0.4386, lr_0 = 3.5333e-04
Loss = 1.4161e-02, PNorm = 174.7044, GNorm = 0.1377, lr_0 = 3.5309e-04
Loss = 1.8348e-02, PNorm = 174.7215, GNorm = 0.2358, lr_0 = 3.5285e-04
Loss = 1.7396e-02, PNorm = 174.7378, GNorm = 0.4137, lr_0 = 3.5261e-04
Loss = 1.0634e-02, PNorm = 174.7557, GNorm = 0.2330, lr_0 = 3.5237e-04
Loss = 1.3491e-02, PNorm = 174.7725, GNorm = 0.2111, lr_0 = 3.5212e-04
Loss = 1.3276e-02, PNorm = 174.7882, GNorm = 0.3606, lr_0 = 3.5188e-04
Loss = 1.2585e-02, PNorm = 174.8019, GNorm = 0.1357, lr_0 = 3.5164e-04
Loss = 1.3992e-02, PNorm = 174.8178, GNorm = 0.2040, lr_0 = 3.5140e-04
Loss = 1.1938e-02, PNorm = 174.8370, GNorm = 0.2430, lr_0 = 3.5116e-04
Loss = 1.2000e-02, PNorm = 174.8512, GNorm = 0.2194, lr_0 = 3.5092e-04
Loss = 1.4637e-02, PNorm = 174.8703, GNorm = 0.2475, lr_0 = 3.5068e-04
Loss = 1.7965e-02, PNorm = 174.8856, GNorm = 0.1795, lr_0 = 3.5044e-04
Loss = 1.2795e-02, PNorm = 174.9046, GNorm = 0.4884, lr_0 = 3.5020e-04
Loss = 1.2802e-02, PNorm = 174.9244, GNorm = 0.2790, lr_0 = 3.4996e-04
Loss = 1.0904e-02, PNorm = 174.9432, GNorm = 0.3549, lr_0 = 3.4972e-04
Loss = 1.1576e-02, PNorm = 174.9608, GNorm = 0.1853, lr_0 = 3.4948e-04
Loss = 1.5355e-02, PNorm = 174.9765, GNorm = 0.2017, lr_0 = 3.4924e-04
Loss = 1.3073e-02, PNorm = 174.9944, GNorm = 0.4546, lr_0 = 3.4900e-04
Loss = 9.5241e-03, PNorm = 175.0092, GNorm = 0.1307, lr_0 = 3.4876e-04
Loss = 1.2395e-02, PNorm = 175.0263, GNorm = 0.2008, lr_0 = 3.4852e-04
Loss = 1.8899e-02, PNorm = 175.0430, GNorm = 0.1467, lr_0 = 3.4828e-04
Loss = 1.3220e-02, PNorm = 175.0628, GNorm = 0.1492, lr_0 = 3.4805e-04
Loss = 1.2567e-02, PNorm = 175.0805, GNorm = 0.2952, lr_0 = 3.4781e-04
Loss = 1.1621e-02, PNorm = 175.0989, GNorm = 0.2218, lr_0 = 3.4757e-04
Loss = 1.2877e-02, PNorm = 175.1166, GNorm = 0.1208, lr_0 = 3.4733e-04
Loss = 1.7537e-02, PNorm = 175.1325, GNorm = 0.5868, lr_0 = 3.4709e-04
Loss = 2.0497e-02, PNorm = 175.1475, GNorm = 0.3301, lr_0 = 3.4686e-04
Loss = 1.1007e-02, PNorm = 175.1673, GNorm = 0.1790, lr_0 = 3.4662e-04
Loss = 1.0362e-02, PNorm = 175.1842, GNorm = 0.1678, lr_0 = 3.4638e-04
Loss = 1.4238e-02, PNorm = 175.2001, GNorm = 0.1934, lr_0 = 3.4614e-04
Loss = 1.3984e-02, PNorm = 175.2176, GNorm = 0.3786, lr_0 = 3.4591e-04
Loss = 1.3182e-02, PNorm = 175.2340, GNorm = 0.3292, lr_0 = 3.4567e-04
Loss = 1.2385e-02, PNorm = 175.2503, GNorm = 0.4598, lr_0 = 3.4543e-04
Loss = 1.8242e-02, PNorm = 175.2660, GNorm = 0.1936, lr_0 = 3.4520e-04
Loss = 1.3653e-02, PNorm = 175.2830, GNorm = 0.3520, lr_0 = 3.4496e-04
Loss = 1.1727e-02, PNorm = 175.3032, GNorm = 0.1972, lr_0 = 3.4472e-04
Loss = 1.2999e-02, PNorm = 175.3210, GNorm = 0.4019, lr_0 = 3.4449e-04
Loss = 1.5870e-02, PNorm = 175.3405, GNorm = 0.3101, lr_0 = 3.4425e-04
Loss = 1.0929e-02, PNorm = 175.3577, GNorm = 0.1931, lr_0 = 3.4402e-04
Loss = 1.8418e-02, PNorm = 175.3746, GNorm = 0.2568, lr_0 = 3.4378e-04
Loss = 1.3647e-02, PNorm = 175.3906, GNorm = 0.3315, lr_0 = 3.4354e-04
Loss = 1.4117e-02, PNorm = 175.4103, GNorm = 0.1676, lr_0 = 3.4331e-04
Validation mae = 0.121389
Epoch 15
Loss = 1.3868e-02, PNorm = 175.4273, GNorm = 0.1692, lr_0 = 3.4307e-04
Loss = 1.2017e-02, PNorm = 175.4420, GNorm = 0.1484, lr_0 = 3.4284e-04
Loss = 1.5731e-02, PNorm = 175.4543, GNorm = 0.2915, lr_0 = 3.4260e-04
Loss = 1.5563e-02, PNorm = 175.4637, GNorm = 0.2572, lr_0 = 3.4237e-04
Loss = 1.0617e-02, PNorm = 175.4746, GNorm = 0.1672, lr_0 = 3.4213e-04
Loss = 1.3245e-02, PNorm = 175.4855, GNorm = 0.2433, lr_0 = 3.4190e-04
Loss = 1.0065e-02, PNorm = 175.4935, GNorm = 0.2247, lr_0 = 3.4167e-04
Loss = 2.4043e-02, PNorm = 175.5051, GNorm = 0.1794, lr_0 = 3.4143e-04
Loss = 1.2814e-02, PNorm = 175.5166, GNorm = 0.1892, lr_0 = 3.4120e-04
Loss = 1.0196e-02, PNorm = 175.5275, GNorm = 0.3550, lr_0 = 3.4096e-04
Loss = 1.6362e-02, PNorm = 175.5388, GNorm = 0.1547, lr_0 = 3.4073e-04
Loss = 1.2181e-02, PNorm = 175.5529, GNorm = 0.2219, lr_0 = 3.4050e-04
Loss = 1.7509e-02, PNorm = 175.5640, GNorm = 0.1854, lr_0 = 3.4026e-04
Loss = 1.1140e-02, PNorm = 175.5738, GNorm = 0.5002, lr_0 = 3.4003e-04
Loss = 1.1665e-02, PNorm = 175.5862, GNorm = 0.1677, lr_0 = 3.3980e-04
Loss = 1.2908e-02, PNorm = 175.5994, GNorm = 0.2338, lr_0 = 3.3956e-04
Loss = 9.7519e-03, PNorm = 175.6128, GNorm = 0.1550, lr_0 = 3.3933e-04
Loss = 1.1814e-02, PNorm = 175.6220, GNorm = 0.3268, lr_0 = 3.3910e-04
Loss = 1.0167e-02, PNorm = 175.6348, GNorm = 0.3838, lr_0 = 3.3887e-04
Loss = 1.4302e-02, PNorm = 175.6453, GNorm = 0.1735, lr_0 = 3.3864e-04
Loss = 1.5445e-02, PNorm = 175.6591, GNorm = 0.2575, lr_0 = 3.3840e-04
Loss = 1.4418e-02, PNorm = 175.6706, GNorm = 0.9979, lr_0 = 3.3817e-04
Loss = 1.1997e-02, PNorm = 175.6797, GNorm = 0.1832, lr_0 = 3.3794e-04
Loss = 1.1843e-02, PNorm = 175.6919, GNorm = 0.3123, lr_0 = 3.3771e-04
Loss = 1.0707e-02, PNorm = 175.7019, GNorm = 0.1375, lr_0 = 3.3748e-04
Loss = 1.6921e-02, PNorm = 175.7135, GNorm = 0.3004, lr_0 = 3.3725e-04
Loss = 1.3339e-02, PNorm = 175.7280, GNorm = 0.2197, lr_0 = 3.3701e-04
Loss = 1.0111e-02, PNorm = 175.7390, GNorm = 0.1499, lr_0 = 3.3678e-04
Loss = 9.5975e-03, PNorm = 175.7508, GNorm = 0.1217, lr_0 = 3.3655e-04
Loss = 1.3355e-02, PNorm = 175.7624, GNorm = 0.1415, lr_0 = 3.3632e-04
Loss = 1.0490e-02, PNorm = 175.7735, GNorm = 0.1979, lr_0 = 3.3609e-04
Loss = 1.1636e-02, PNorm = 175.7875, GNorm = 0.1585, lr_0 = 3.3586e-04
Loss = 1.6792e-02, PNorm = 175.8004, GNorm = 0.1592, lr_0 = 3.3563e-04
Loss = 1.3784e-02, PNorm = 175.8128, GNorm = 0.3256, lr_0 = 3.3540e-04
Loss = 1.0317e-02, PNorm = 175.8247, GNorm = 0.2403, lr_0 = 3.3517e-04
Loss = 1.2046e-02, PNorm = 175.8407, GNorm = 0.2903, lr_0 = 3.3494e-04
Loss = 1.0066e-02, PNorm = 175.8567, GNorm = 0.2565, lr_0 = 3.3471e-04
Loss = 9.4783e-03, PNorm = 175.8696, GNorm = 0.1385, lr_0 = 3.3448e-04
Loss = 1.4056e-02, PNorm = 175.8801, GNorm = 0.2092, lr_0 = 3.3425e-04
Loss = 1.1428e-02, PNorm = 175.8924, GNorm = 0.2136, lr_0 = 3.3403e-04
Loss = 1.1443e-02, PNorm = 175.9037, GNorm = 0.2409, lr_0 = 3.3380e-04
Loss = 1.1283e-02, PNorm = 175.9118, GNorm = 0.2753, lr_0 = 3.3357e-04
Loss = 1.4366e-02, PNorm = 175.9213, GNorm = 0.2834, lr_0 = 3.3334e-04
Loss = 1.2845e-02, PNorm = 175.9324, GNorm = 0.2196, lr_0 = 3.3311e-04
Loss = 1.3403e-02, PNorm = 175.9432, GNorm = 0.4484, lr_0 = 3.3288e-04
Loss = 1.2216e-02, PNorm = 175.9551, GNorm = 0.2459, lr_0 = 3.3265e-04
Loss = 1.0610e-02, PNorm = 175.9683, GNorm = 0.3359, lr_0 = 3.3243e-04
Loss = 1.0392e-02, PNorm = 175.9797, GNorm = 0.3399, lr_0 = 3.3220e-04
Loss = 1.4567e-02, PNorm = 175.9935, GNorm = 0.1988, lr_0 = 3.3197e-04
Loss = 9.8687e-03, PNorm = 176.0038, GNorm = 0.0930, lr_0 = 3.3174e-04
Loss = 1.3242e-02, PNorm = 176.0163, GNorm = 0.3323, lr_0 = 3.3152e-04
Loss = 1.0146e-02, PNorm = 176.0320, GNorm = 0.1967, lr_0 = 3.3129e-04
Loss = 1.2182e-02, PNorm = 176.0462, GNorm = 0.1825, lr_0 = 3.3106e-04
Loss = 1.6006e-02, PNorm = 176.0613, GNorm = 0.3318, lr_0 = 3.3084e-04
Loss = 1.5916e-02, PNorm = 176.0762, GNorm = 0.2305, lr_0 = 3.3061e-04
Loss = 9.1180e-03, PNorm = 176.0916, GNorm = 0.2390, lr_0 = 3.3038e-04
Loss = 1.5579e-02, PNorm = 176.1031, GNorm = 0.3055, lr_0 = 3.3016e-04
Loss = 1.2636e-02, PNorm = 176.1192, GNorm = 0.2268, lr_0 = 3.2993e-04
Loss = 1.0985e-02, PNorm = 176.1333, GNorm = 0.2469, lr_0 = 3.2970e-04
Loss = 1.3185e-02, PNorm = 176.1465, GNorm = 0.2363, lr_0 = 3.2948e-04
Loss = 1.1036e-02, PNorm = 176.1613, GNorm = 0.2352, lr_0 = 3.2925e-04
Loss = 1.2334e-02, PNorm = 176.1767, GNorm = 0.4193, lr_0 = 3.2903e-04
Loss = 1.2864e-02, PNorm = 176.1937, GNorm = 0.4261, lr_0 = 3.2880e-04
Loss = 1.1773e-02, PNorm = 176.2068, GNorm = 0.4190, lr_0 = 3.2858e-04
Loss = 1.2336e-02, PNorm = 176.2254, GNorm = 0.1441, lr_0 = 3.2835e-04
Loss = 1.2647e-02, PNorm = 176.2419, GNorm = 0.2756, lr_0 = 3.2813e-04
Loss = 1.7388e-02, PNorm = 176.2569, GNorm = 0.1977, lr_0 = 3.2790e-04
Loss = 1.1949e-02, PNorm = 176.2698, GNorm = 0.4591, lr_0 = 3.2768e-04
Loss = 1.2922e-02, PNorm = 176.2808, GNorm = 0.1757, lr_0 = 3.2745e-04
Loss = 1.0772e-02, PNorm = 176.2941, GNorm = 0.1486, lr_0 = 3.2723e-04
Loss = 1.1342e-02, PNorm = 176.3060, GNorm = 0.4154, lr_0 = 3.2700e-04
Loss = 1.0250e-02, PNorm = 176.3218, GNorm = 0.3371, lr_0 = 3.2678e-04
Loss = 1.2840e-02, PNorm = 176.3379, GNorm = 0.2019, lr_0 = 3.2656e-04
Loss = 1.0229e-02, PNorm = 176.3533, GNorm = 0.2094, lr_0 = 3.2633e-04
Loss = 1.2641e-02, PNorm = 176.3698, GNorm = 0.1460, lr_0 = 3.2611e-04
Loss = 9.9278e-03, PNorm = 176.3853, GNorm = 0.1379, lr_0 = 3.2589e-04
Loss = 7.6280e-03, PNorm = 176.3993, GNorm = 0.2769, lr_0 = 3.2566e-04
Loss = 1.2723e-02, PNorm = 176.4083, GNorm = 0.2646, lr_0 = 3.2544e-04
Loss = 1.0280e-02, PNorm = 176.4181, GNorm = 0.4061, lr_0 = 3.2522e-04
Loss = 1.1297e-02, PNorm = 176.4320, GNorm = 0.5259, lr_0 = 3.2499e-04
Loss = 9.3923e-03, PNorm = 176.4472, GNorm = 0.2228, lr_0 = 3.2477e-04
Loss = 1.0436e-02, PNorm = 176.4586, GNorm = 0.2074, lr_0 = 3.2455e-04
Loss = 1.0467e-02, PNorm = 176.4708, GNorm = 0.3248, lr_0 = 3.2433e-04
Loss = 1.0616e-02, PNorm = 176.4841, GNorm = 0.4963, lr_0 = 3.2410e-04
Loss = 1.1404e-02, PNorm = 176.4980, GNorm = 0.1880, lr_0 = 3.2388e-04
Loss = 1.4537e-02, PNorm = 176.5103, GNorm = 0.2164, lr_0 = 3.2366e-04
Loss = 1.2038e-02, PNorm = 176.5248, GNorm = 0.2153, lr_0 = 3.2344e-04
Loss = 9.7208e-03, PNorm = 176.5387, GNorm = 0.4746, lr_0 = 3.2322e-04
Loss = 1.3308e-02, PNorm = 176.5508, GNorm = 0.2883, lr_0 = 3.2300e-04
Loss = 1.6649e-02, PNorm = 176.5600, GNorm = 0.1415, lr_0 = 3.2277e-04
Loss = 1.0132e-02, PNorm = 176.5711, GNorm = 0.1902, lr_0 = 3.2255e-04
Loss = 2.1516e-02, PNorm = 176.5828, GNorm = 0.2721, lr_0 = 3.2233e-04
Loss = 1.2957e-02, PNorm = 176.5968, GNorm = 0.2905, lr_0 = 3.2211e-04
Loss = 1.2657e-02, PNorm = 176.6106, GNorm = 0.1777, lr_0 = 3.2189e-04
Loss = 1.2392e-02, PNorm = 176.6269, GNorm = 0.1817, lr_0 = 3.2167e-04
Loss = 1.7630e-02, PNorm = 176.6422, GNorm = 0.1444, lr_0 = 3.2145e-04
Loss = 1.0737e-02, PNorm = 176.6588, GNorm = 0.1049, lr_0 = 3.2123e-04
Loss = 1.4578e-02, PNorm = 176.6748, GNorm = 0.3164, lr_0 = 3.2101e-04
Loss = 9.3952e-03, PNorm = 176.6860, GNorm = 0.2304, lr_0 = 3.2079e-04
Loss = 1.1056e-02, PNorm = 176.6991, GNorm = 0.1217, lr_0 = 3.2057e-04
Loss = 1.3433e-02, PNorm = 176.7148, GNorm = 0.4166, lr_0 = 3.2035e-04
Loss = 1.8964e-02, PNorm = 176.7300, GNorm = 0.2727, lr_0 = 3.2013e-04
Loss = 1.1615e-02, PNorm = 176.7475, GNorm = 0.3021, lr_0 = 3.1991e-04
Loss = 1.0793e-02, PNorm = 176.7615, GNorm = 0.5678, lr_0 = 3.1969e-04
Loss = 1.4559e-02, PNorm = 176.7737, GNorm = 0.2197, lr_0 = 3.1947e-04
Loss = 1.3430e-02, PNorm = 176.7902, GNorm = 0.2439, lr_0 = 3.1925e-04
Loss = 1.0178e-02, PNorm = 176.8062, GNorm = 0.2614, lr_0 = 3.1904e-04
Loss = 1.0119e-02, PNorm = 176.8212, GNorm = 0.1615, lr_0 = 3.1882e-04
Loss = 1.6985e-02, PNorm = 176.8351, GNorm = 0.1734, lr_0 = 3.1860e-04
Loss = 1.4730e-02, PNorm = 176.8500, GNorm = 0.2782, lr_0 = 3.1838e-04
Loss = 9.6796e-03, PNorm = 176.8642, GNorm = 0.1832, lr_0 = 3.1816e-04
Loss = 1.1906e-02, PNorm = 176.8778, GNorm = 0.1345, lr_0 = 3.1794e-04
Loss = 1.2967e-02, PNorm = 176.8876, GNorm = 0.3419, lr_0 = 3.1773e-04
Loss = 1.1986e-02, PNorm = 176.9004, GNorm = 0.1609, lr_0 = 3.1751e-04
Loss = 1.5303e-02, PNorm = 176.9140, GNorm = 0.2011, lr_0 = 3.1729e-04
Loss = 1.1374e-02, PNorm = 176.9264, GNorm = 0.2407, lr_0 = 3.1707e-04
Loss = 1.1373e-02, PNorm = 176.9382, GNorm = 0.1373, lr_0 = 3.1686e-04
Loss = 1.0387e-02, PNorm = 176.9481, GNorm = 0.3731, lr_0 = 3.1664e-04
Loss = 1.0603e-02, PNorm = 176.9629, GNorm = 0.5938, lr_0 = 3.1642e-04
Loss = 9.0607e-03, PNorm = 176.9750, GNorm = 0.2863, lr_0 = 3.1621e-04
Validation mae = 0.121106
Epoch 16
Loss = 9.8219e-03, PNorm = 176.9867, GNorm = 0.2184, lr_0 = 3.1599e-04
Loss = 9.4400e-03, PNorm = 176.9987, GNorm = 0.2538, lr_0 = 3.1577e-04
Loss = 2.7455e-02, PNorm = 177.0098, GNorm = 2.6262, lr_0 = 3.1556e-04
Loss = 9.6349e-03, PNorm = 177.0228, GNorm = 0.2101, lr_0 = 3.1534e-04
Loss = 1.5596e-02, PNorm = 177.0345, GNorm = 0.2486, lr_0 = 3.1512e-04
Loss = 9.4398e-03, PNorm = 177.0463, GNorm = 0.3383, lr_0 = 3.1491e-04
Loss = 1.3230e-02, PNorm = 177.0555, GNorm = 0.2167, lr_0 = 3.1469e-04
Loss = 1.2387e-02, PNorm = 177.0669, GNorm = 0.1916, lr_0 = 3.1448e-04
Loss = 1.1841e-02, PNorm = 177.0776, GNorm = 0.2796, lr_0 = 3.1426e-04
Loss = 8.0331e-03, PNorm = 177.0875, GNorm = 0.0963, lr_0 = 3.1405e-04
Loss = 1.4736e-02, PNorm = 177.0956, GNorm = 0.3673, lr_0 = 3.1383e-04
Loss = 8.2811e-03, PNorm = 177.1053, GNorm = 0.2404, lr_0 = 3.1362e-04
Loss = 1.3050e-02, PNorm = 177.1141, GNorm = 0.3221, lr_0 = 3.1340e-04
Loss = 9.3553e-03, PNorm = 177.1245, GNorm = 0.3299, lr_0 = 3.1319e-04
Loss = 9.0902e-03, PNorm = 177.1352, GNorm = 0.3663, lr_0 = 3.1297e-04
Loss = 1.3654e-02, PNorm = 177.1444, GNorm = 0.4122, lr_0 = 3.1276e-04
Loss = 1.3441e-02, PNorm = 177.1578, GNorm = 0.1684, lr_0 = 3.1254e-04
Loss = 1.3810e-02, PNorm = 177.1683, GNorm = 0.4172, lr_0 = 3.1233e-04
Loss = 9.1500e-03, PNorm = 177.1820, GNorm = 0.2128, lr_0 = 3.1212e-04
Loss = 8.0727e-03, PNorm = 177.1950, GNorm = 0.1417, lr_0 = 3.1190e-04
Loss = 8.7428e-03, PNorm = 177.2030, GNorm = 0.1371, lr_0 = 3.1169e-04
Loss = 1.2909e-02, PNorm = 177.2127, GNorm = 0.4712, lr_0 = 3.1147e-04
Loss = 1.0672e-02, PNorm = 177.2228, GNorm = 0.3835, lr_0 = 3.1126e-04
Loss = 1.3685e-02, PNorm = 177.2313, GNorm = 0.2525, lr_0 = 3.1105e-04
Loss = 1.1306e-02, PNorm = 177.2413, GNorm = 0.2628, lr_0 = 3.1083e-04
Loss = 1.2933e-02, PNorm = 177.2475, GNorm = 0.1171, lr_0 = 3.1062e-04
Loss = 7.6748e-03, PNorm = 177.2575, GNorm = 0.1526, lr_0 = 3.1041e-04
Loss = 8.9616e-03, PNorm = 177.2644, GNorm = 0.2845, lr_0 = 3.1020e-04
Loss = 9.3749e-03, PNorm = 177.2723, GNorm = 0.2903, lr_0 = 3.0998e-04
Loss = 1.0442e-02, PNorm = 177.2833, GNorm = 0.1867, lr_0 = 3.0977e-04
Loss = 9.4064e-03, PNorm = 177.2925, GNorm = 0.3742, lr_0 = 3.0956e-04
Loss = 1.1145e-02, PNorm = 177.3025, GNorm = 0.2229, lr_0 = 3.0935e-04
Loss = 1.0025e-02, PNorm = 177.3131, GNorm = 0.1467, lr_0 = 3.0914e-04
Loss = 1.9542e-02, PNorm = 177.3225, GNorm = 0.3198, lr_0 = 3.0892e-04
Loss = 9.3380e-03, PNorm = 177.3317, GNorm = 0.2271, lr_0 = 3.0871e-04
Loss = 1.0342e-02, PNorm = 177.3413, GNorm = 0.1911, lr_0 = 3.0850e-04
Loss = 8.5211e-03, PNorm = 177.3522, GNorm = 0.1496, lr_0 = 3.0829e-04
Loss = 8.1765e-03, PNorm = 177.3652, GNorm = 0.2015, lr_0 = 3.0808e-04
Loss = 9.2155e-03, PNorm = 177.3783, GNorm = 0.1945, lr_0 = 3.0787e-04
Loss = 9.4829e-03, PNorm = 177.3887, GNorm = 0.1007, lr_0 = 3.0766e-04
Loss = 1.3523e-02, PNorm = 177.4009, GNorm = 0.1846, lr_0 = 3.0745e-04
Loss = 1.1976e-02, PNorm = 177.4094, GNorm = 0.4501, lr_0 = 3.0723e-04
Loss = 1.2484e-02, PNorm = 177.4180, GNorm = 0.2113, lr_0 = 3.0702e-04
Loss = 1.0582e-02, PNorm = 177.4294, GNorm = 0.2336, lr_0 = 3.0681e-04
Loss = 7.6712e-03, PNorm = 177.4398, GNorm = 0.1282, lr_0 = 3.0660e-04
Loss = 8.7151e-03, PNorm = 177.4531, GNorm = 0.4628, lr_0 = 3.0639e-04
Loss = 1.5343e-02, PNorm = 177.4625, GNorm = 0.1324, lr_0 = 3.0618e-04
Loss = 1.1010e-02, PNorm = 177.4736, GNorm = 0.1251, lr_0 = 3.0597e-04
Loss = 7.9767e-03, PNorm = 177.4856, GNorm = 0.2130, lr_0 = 3.0576e-04
Loss = 1.4913e-02, PNorm = 177.4964, GNorm = 0.2265, lr_0 = 3.0555e-04
Loss = 1.5360e-02, PNorm = 177.5067, GNorm = 0.4592, lr_0 = 3.0535e-04
Loss = 7.6161e-03, PNorm = 177.5144, GNorm = 0.1108, lr_0 = 3.0514e-04
Loss = 1.1393e-02, PNorm = 177.5243, GNorm = 0.3086, lr_0 = 3.0493e-04
Loss = 9.5287e-03, PNorm = 177.5366, GNorm = 0.2335, lr_0 = 3.0472e-04
Loss = 1.0799e-02, PNorm = 177.5473, GNorm = 0.3710, lr_0 = 3.0451e-04
Loss = 1.2338e-02, PNorm = 177.5579, GNorm = 0.3267, lr_0 = 3.0430e-04
Loss = 8.2932e-03, PNorm = 177.5689, GNorm = 0.2586, lr_0 = 3.0409e-04
Loss = 8.4076e-03, PNorm = 177.5771, GNorm = 0.1492, lr_0 = 3.0388e-04
Loss = 8.5986e-03, PNorm = 177.5871, GNorm = 0.2602, lr_0 = 3.0368e-04
Loss = 9.7367e-03, PNorm = 177.5960, GNorm = 0.2371, lr_0 = 3.0347e-04
Loss = 9.2060e-03, PNorm = 177.6054, GNorm = 0.2476, lr_0 = 3.0326e-04
Loss = 9.0847e-03, PNorm = 177.6162, GNorm = 0.2501, lr_0 = 3.0305e-04
Loss = 1.0493e-02, PNorm = 177.6255, GNorm = 0.1498, lr_0 = 3.0284e-04
Loss = 9.9246e-03, PNorm = 177.6358, GNorm = 0.1273, lr_0 = 3.0264e-04
Loss = 8.2922e-03, PNorm = 177.6475, GNorm = 0.1707, lr_0 = 3.0243e-04
Loss = 1.0767e-02, PNorm = 177.6579, GNorm = 0.3221, lr_0 = 3.0222e-04
Loss = 8.6199e-03, PNorm = 177.6713, GNorm = 0.2201, lr_0 = 3.0202e-04
Loss = 1.0257e-02, PNorm = 177.6825, GNorm = 0.3692, lr_0 = 3.0181e-04
Loss = 8.1674e-03, PNorm = 177.6954, GNorm = 0.3304, lr_0 = 3.0160e-04
Loss = 8.9236e-03, PNorm = 177.7077, GNorm = 0.2349, lr_0 = 3.0140e-04
Loss = 9.8118e-03, PNorm = 177.7167, GNorm = 0.6263, lr_0 = 3.0119e-04
Loss = 1.2936e-02, PNorm = 177.7285, GNorm = 0.1850, lr_0 = 3.0098e-04
Loss = 1.1679e-02, PNorm = 177.7392, GNorm = 0.1910, lr_0 = 3.0078e-04
Loss = 1.2278e-02, PNorm = 177.7482, GNorm = 0.3691, lr_0 = 3.0057e-04
Loss = 1.4937e-02, PNorm = 177.7584, GNorm = 0.8483, lr_0 = 3.0036e-04
Loss = 1.0190e-02, PNorm = 177.7697, GNorm = 0.1704, lr_0 = 3.0016e-04
Loss = 1.2778e-02, PNorm = 177.7806, GNorm = 0.2185, lr_0 = 2.9995e-04
Loss = 1.6576e-02, PNorm = 177.7923, GNorm = 0.3159, lr_0 = 2.9975e-04
Loss = 8.9272e-03, PNorm = 177.8043, GNorm = 0.1787, lr_0 = 2.9954e-04
Loss = 7.7952e-03, PNorm = 177.8143, GNorm = 0.1715, lr_0 = 2.9934e-04
Loss = 8.8941e-03, PNorm = 177.8242, GNorm = 0.2870, lr_0 = 2.9913e-04
Loss = 8.7526e-03, PNorm = 177.8328, GNorm = 0.2021, lr_0 = 2.9893e-04
Loss = 1.0551e-02, PNorm = 177.8462, GNorm = 0.2931, lr_0 = 2.9872e-04
Loss = 1.1027e-02, PNorm = 177.8589, GNorm = 0.2192, lr_0 = 2.9852e-04
Loss = 1.2971e-02, PNorm = 177.8723, GNorm = 0.4375, lr_0 = 2.9831e-04
Loss = 1.4069e-02, PNorm = 177.8851, GNorm = 0.1794, lr_0 = 2.9811e-04
Loss = 8.1368e-03, PNorm = 177.8977, GNorm = 0.2026, lr_0 = 2.9790e-04
Loss = 1.0174e-02, PNorm = 177.9122, GNorm = 0.2940, lr_0 = 2.9770e-04
Loss = 7.5287e-03, PNorm = 177.9257, GNorm = 0.2159, lr_0 = 2.9750e-04
Loss = 1.0427e-02, PNorm = 177.9385, GNorm = 0.2157, lr_0 = 2.9729e-04
Loss = 8.2976e-03, PNorm = 177.9508, GNorm = 0.5074, lr_0 = 2.9709e-04
Loss = 7.7890e-03, PNorm = 177.9623, GNorm = 0.2333, lr_0 = 2.9689e-04
Loss = 7.3053e-03, PNorm = 177.9729, GNorm = 0.1547, lr_0 = 2.9668e-04
Loss = 8.3057e-03, PNorm = 177.9825, GNorm = 0.1919, lr_0 = 2.9648e-04
Loss = 1.0747e-02, PNorm = 177.9936, GNorm = 0.2363, lr_0 = 2.9628e-04
Loss = 1.4876e-02, PNorm = 178.0031, GNorm = 0.1412, lr_0 = 2.9607e-04
Loss = 1.1309e-02, PNorm = 178.0133, GNorm = 0.3416, lr_0 = 2.9587e-04
Loss = 1.1930e-02, PNorm = 178.0228, GNorm = 0.2846, lr_0 = 2.9567e-04
Loss = 8.3526e-03, PNorm = 178.0336, GNorm = 0.1150, lr_0 = 2.9546e-04
Loss = 8.5815e-03, PNorm = 178.0444, GNorm = 0.1548, lr_0 = 2.9526e-04
Loss = 7.3833e-03, PNorm = 178.0540, GNorm = 0.2007, lr_0 = 2.9506e-04
Loss = 1.3200e-02, PNorm = 178.0652, GNorm = 0.1809, lr_0 = 2.9486e-04
Loss = 1.0859e-02, PNorm = 178.0753, GNorm = 0.2589, lr_0 = 2.9466e-04
Loss = 7.0012e-03, PNorm = 178.0884, GNorm = 0.1006, lr_0 = 2.9445e-04
Loss = 9.9362e-03, PNorm = 178.0987, GNorm = 0.1597, lr_0 = 2.9425e-04
Loss = 8.5942e-03, PNorm = 178.1101, GNorm = 0.1910, lr_0 = 2.9405e-04
Loss = 6.9551e-03, PNorm = 178.1219, GNorm = 0.1393, lr_0 = 2.9385e-04
Loss = 1.0293e-02, PNorm = 178.1318, GNorm = 0.1213, lr_0 = 2.9365e-04
Loss = 8.3287e-03, PNorm = 178.1455, GNorm = 0.2333, lr_0 = 2.9345e-04
Loss = 1.0671e-02, PNorm = 178.1580, GNorm = 0.4929, lr_0 = 2.9325e-04
Loss = 9.3535e-03, PNorm = 178.1688, GNorm = 0.4523, lr_0 = 2.9305e-04
Loss = 2.4826e-02, PNorm = 178.1834, GNorm = 0.1700, lr_0 = 2.9284e-04
Loss = 6.7306e-03, PNorm = 178.1935, GNorm = 0.1552, lr_0 = 2.9264e-04
Loss = 1.2608e-02, PNorm = 178.2060, GNorm = 0.1776, lr_0 = 2.9244e-04
Loss = 1.3912e-02, PNorm = 178.2176, GNorm = 0.3503, lr_0 = 2.9224e-04
Loss = 9.3101e-03, PNorm = 178.2293, GNorm = 0.1885, lr_0 = 2.9204e-04
Loss = 1.2070e-02, PNorm = 178.2425, GNorm = 0.3697, lr_0 = 2.9184e-04
Loss = 7.5717e-03, PNorm = 178.2513, GNorm = 0.2505, lr_0 = 2.9164e-04
Loss = 9.2852e-03, PNorm = 178.2638, GNorm = 0.2264, lr_0 = 2.9144e-04
Loss = 7.5552e-03, PNorm = 178.2749, GNorm = 0.2006, lr_0 = 2.9124e-04
Validation mae = 0.121320
Epoch 17
Loss = 8.5153e-03, PNorm = 178.2852, GNorm = 0.1342, lr_0 = 2.9104e-04
Loss = 6.0470e-03, PNorm = 178.2937, GNorm = 0.1424, lr_0 = 2.9084e-04
Loss = 8.0379e-03, PNorm = 178.2995, GNorm = 0.1092, lr_0 = 2.9065e-04
Loss = 8.1989e-03, PNorm = 178.3075, GNorm = 0.1532, lr_0 = 2.9045e-04
Loss = 9.2099e-03, PNorm = 178.3191, GNorm = 0.1505, lr_0 = 2.9025e-04
Loss = 8.4994e-03, PNorm = 178.3272, GNorm = 0.2586, lr_0 = 2.9005e-04
Loss = 8.3154e-03, PNorm = 178.3357, GNorm = 0.3055, lr_0 = 2.8985e-04
Loss = 7.1476e-03, PNorm = 178.3452, GNorm = 0.1912, lr_0 = 2.8965e-04
Loss = 7.4065e-03, PNorm = 178.3553, GNorm = 0.2484, lr_0 = 2.8945e-04
Loss = 8.2436e-03, PNorm = 178.3606, GNorm = 0.2356, lr_0 = 2.8925e-04
Loss = 6.4120e-03, PNorm = 178.3678, GNorm = 0.1210, lr_0 = 2.8906e-04
Loss = 7.4927e-03, PNorm = 178.3753, GNorm = 0.1538, lr_0 = 2.8886e-04
Loss = 5.9000e-03, PNorm = 178.3844, GNorm = 0.1005, lr_0 = 2.8866e-04
Loss = 7.2177e-03, PNorm = 178.3925, GNorm = 0.2556, lr_0 = 2.8846e-04
Loss = 7.2989e-03, PNorm = 178.3993, GNorm = 0.1697, lr_0 = 2.8826e-04
Loss = 8.0431e-03, PNorm = 178.4069, GNorm = 0.1339, lr_0 = 2.8807e-04
Loss = 7.7277e-03, PNorm = 178.4139, GNorm = 0.1228, lr_0 = 2.8787e-04
Loss = 1.0794e-02, PNorm = 178.4194, GNorm = 0.1594, lr_0 = 2.8767e-04
Loss = 8.0338e-03, PNorm = 178.4281, GNorm = 0.4009, lr_0 = 2.8748e-04
Loss = 8.6798e-03, PNorm = 178.4342, GNorm = 0.3558, lr_0 = 2.8728e-04
Loss = 9.2047e-03, PNorm = 178.4441, GNorm = 0.6194, lr_0 = 2.8708e-04
Loss = 8.3769e-03, PNorm = 178.4541, GNorm = 0.2589, lr_0 = 2.8689e-04
Loss = 1.1265e-02, PNorm = 178.4640, GNorm = 0.1830, lr_0 = 2.8669e-04
Loss = 7.1031e-03, PNorm = 178.4722, GNorm = 0.3306, lr_0 = 2.8649e-04
Loss = 9.4651e-03, PNorm = 178.4811, GNorm = 0.1612, lr_0 = 2.8630e-04
Loss = 8.9563e-03, PNorm = 178.4901, GNorm = 0.2009, lr_0 = 2.8610e-04
Loss = 7.1913e-03, PNorm = 178.4970, GNorm = 0.2845, lr_0 = 2.8590e-04
Loss = 1.1798e-02, PNorm = 178.5056, GNorm = 0.2493, lr_0 = 2.8571e-04
Loss = 1.0197e-02, PNorm = 178.5155, GNorm = 0.1396, lr_0 = 2.8551e-04
Loss = 1.3308e-02, PNorm = 178.5225, GNorm = 0.4159, lr_0 = 2.8532e-04
Loss = 7.7565e-03, PNorm = 178.5309, GNorm = 0.4337, lr_0 = 2.8512e-04
Loss = 6.9203e-03, PNorm = 178.5382, GNorm = 0.1353, lr_0 = 2.8493e-04
Loss = 8.7724e-03, PNorm = 178.5447, GNorm = 0.3104, lr_0 = 2.8473e-04
Loss = 6.8909e-03, PNorm = 178.5512, GNorm = 0.1652, lr_0 = 2.8454e-04
Loss = 1.3189e-02, PNorm = 178.5582, GNorm = 0.2508, lr_0 = 2.8434e-04
Loss = 1.0169e-02, PNorm = 178.5662, GNorm = 0.2676, lr_0 = 2.8415e-04
Loss = 8.1939e-03, PNorm = 178.5755, GNorm = 0.1238, lr_0 = 2.8395e-04
Loss = 6.8216e-03, PNorm = 178.5855, GNorm = 0.1829, lr_0 = 2.8376e-04
Loss = 7.5393e-03, PNorm = 178.5917, GNorm = 0.1794, lr_0 = 2.8356e-04
Loss = 1.2529e-02, PNorm = 178.5979, GNorm = 1.0098, lr_0 = 2.8337e-04
Loss = 8.5949e-03, PNorm = 178.6087, GNorm = 0.2807, lr_0 = 2.8317e-04
Loss = 6.7502e-03, PNorm = 178.6177, GNorm = 0.3008, lr_0 = 2.8298e-04
Loss = 8.7750e-03, PNorm = 178.6236, GNorm = 0.0962, lr_0 = 2.8279e-04
Loss = 5.9675e-03, PNorm = 178.6329, GNorm = 0.2467, lr_0 = 2.8259e-04
Loss = 1.1986e-02, PNorm = 178.6432, GNorm = 0.2650, lr_0 = 2.8240e-04
Loss = 1.0026e-02, PNorm = 178.6547, GNorm = 0.1591, lr_0 = 2.8221e-04
Loss = 8.9536e-03, PNorm = 178.6642, GNorm = 0.1634, lr_0 = 2.8201e-04
Loss = 1.2307e-02, PNorm = 178.6765, GNorm = 0.1709, lr_0 = 2.8182e-04
Loss = 9.7596e-03, PNorm = 178.6855, GNorm = 0.2225, lr_0 = 2.8163e-04
Loss = 8.0432e-03, PNorm = 178.6936, GNorm = 0.1499, lr_0 = 2.8143e-04
Loss = 8.8658e-03, PNorm = 178.7007, GNorm = 0.1956, lr_0 = 2.8124e-04
Loss = 1.4126e-02, PNorm = 178.7074, GNorm = 0.5551, lr_0 = 2.8105e-04
Loss = 7.6504e-03, PNorm = 178.7162, GNorm = 0.2164, lr_0 = 2.8085e-04
Loss = 7.0050e-03, PNorm = 178.7275, GNorm = 0.1319, lr_0 = 2.8066e-04
Loss = 1.2105e-02, PNorm = 178.7389, GNorm = 0.0981, lr_0 = 2.8047e-04
Loss = 1.0253e-02, PNorm = 178.7480, GNorm = 0.2115, lr_0 = 2.8028e-04
Loss = 1.0256e-02, PNorm = 178.7571, GNorm = 0.3735, lr_0 = 2.8009e-04
Loss = 1.3611e-02, PNorm = 178.7621, GNorm = 0.1224, lr_0 = 2.7989e-04
Loss = 7.9325e-03, PNorm = 178.7704, GNorm = 0.1459, lr_0 = 2.7970e-04
Loss = 7.4547e-03, PNorm = 178.7807, GNorm = 0.1367, lr_0 = 2.7951e-04
Loss = 1.1128e-02, PNorm = 178.7880, GNorm = 0.2821, lr_0 = 2.7932e-04
Loss = 1.4288e-02, PNorm = 178.8008, GNorm = 0.1906, lr_0 = 2.7913e-04
Loss = 7.7216e-03, PNorm = 178.8129, GNorm = 0.3346, lr_0 = 2.7894e-04
Loss = 6.6772e-03, PNorm = 178.8229, GNorm = 0.1834, lr_0 = 2.7875e-04
Loss = 1.2023e-02, PNorm = 178.8316, GNorm = 0.2421, lr_0 = 2.7855e-04
Loss = 6.4964e-03, PNorm = 178.8389, GNorm = 0.2171, lr_0 = 2.7836e-04
Loss = 9.8339e-03, PNorm = 178.8455, GNorm = 0.1420, lr_0 = 2.7817e-04
Loss = 6.9221e-03, PNorm = 178.8536, GNorm = 0.1279, lr_0 = 2.7798e-04
Loss = 7.7343e-03, PNorm = 178.8625, GNorm = 0.1646, lr_0 = 2.7779e-04
Loss = 1.5019e-02, PNorm = 178.8713, GNorm = 0.2466, lr_0 = 2.7760e-04
Loss = 1.3068e-02, PNorm = 178.8830, GNorm = 0.2471, lr_0 = 2.7741e-04
Loss = 9.4437e-03, PNorm = 178.8919, GNorm = 0.2677, lr_0 = 2.7722e-04
Loss = 6.2353e-03, PNorm = 178.9001, GNorm = 0.2375, lr_0 = 2.7703e-04
Loss = 8.5651e-03, PNorm = 178.9085, GNorm = 0.1981, lr_0 = 2.7684e-04
Loss = 8.1778e-03, PNorm = 178.9186, GNorm = 0.1175, lr_0 = 2.7665e-04
Loss = 7.8740e-03, PNorm = 178.9292, GNorm = 0.1410, lr_0 = 2.7646e-04
Loss = 7.4262e-03, PNorm = 178.9390, GNorm = 0.1541, lr_0 = 2.7627e-04
Loss = 1.0581e-02, PNorm = 178.9467, GNorm = 0.2173, lr_0 = 2.7608e-04
Loss = 1.5676e-02, PNorm = 178.9541, GNorm = 0.4052, lr_0 = 2.7590e-04
Loss = 8.3814e-03, PNorm = 178.9668, GNorm = 0.1945, lr_0 = 2.7571e-04
Loss = 6.7446e-03, PNorm = 178.9753, GNorm = 0.2740, lr_0 = 2.7552e-04
Loss = 5.2550e-03, PNorm = 178.9828, GNorm = 0.1689, lr_0 = 2.7533e-04
Loss = 7.5384e-03, PNorm = 178.9907, GNorm = 0.2291, lr_0 = 2.7514e-04
Loss = 7.8998e-03, PNorm = 178.9983, GNorm = 0.1496, lr_0 = 2.7495e-04
Loss = 6.8979e-03, PNorm = 179.0079, GNorm = 0.2064, lr_0 = 2.7476e-04
Loss = 7.3685e-03, PNorm = 179.0174, GNorm = 0.1485, lr_0 = 2.7457e-04
Loss = 8.6964e-03, PNorm = 179.0278, GNorm = 0.2871, lr_0 = 2.7439e-04
Loss = 1.2367e-02, PNorm = 179.0370, GNorm = 0.3688, lr_0 = 2.7420e-04
Loss = 7.8951e-03, PNorm = 179.0464, GNorm = 0.1265, lr_0 = 2.7401e-04
Loss = 1.3436e-02, PNorm = 179.0543, GNorm = 0.3024, lr_0 = 2.7382e-04
Loss = 1.1641e-02, PNorm = 179.0650, GNorm = 0.5617, lr_0 = 2.7364e-04
Loss = 8.3599e-03, PNorm = 179.0755, GNorm = 0.1648, lr_0 = 2.7345e-04
Loss = 1.2311e-02, PNorm = 179.0869, GNorm = 0.1506, lr_0 = 2.7326e-04
Loss = 9.8600e-03, PNorm = 179.0969, GNorm = 0.5035, lr_0 = 2.7307e-04
Loss = 1.2207e-02, PNorm = 179.1077, GNorm = 0.2292, lr_0 = 2.7289e-04
Loss = 6.9107e-03, PNorm = 179.1171, GNorm = 0.2217, lr_0 = 2.7270e-04
Loss = 9.7677e-03, PNorm = 179.1274, GNorm = 0.1922, lr_0 = 2.7251e-04
Loss = 9.0040e-03, PNorm = 179.1379, GNorm = 0.3889, lr_0 = 2.7233e-04
Loss = 7.1720e-03, PNorm = 179.1461, GNorm = 0.3352, lr_0 = 2.7214e-04
Loss = 9.2854e-03, PNorm = 179.1542, GNorm = 0.3732, lr_0 = 2.7195e-04
Loss = 1.1094e-02, PNorm = 179.1656, GNorm = 0.6974, lr_0 = 2.7177e-04
Loss = 1.1229e-02, PNorm = 179.1740, GNorm = 0.6231, lr_0 = 2.7158e-04
Loss = 7.0370e-03, PNorm = 179.1836, GNorm = 0.3744, lr_0 = 2.7139e-04
Loss = 1.0257e-02, PNorm = 179.1955, GNorm = 0.3312, lr_0 = 2.7121e-04
Loss = 9.2498e-03, PNorm = 179.2053, GNorm = 0.1374, lr_0 = 2.7102e-04
Loss = 1.3449e-02, PNorm = 179.2155, GNorm = 0.1226, lr_0 = 2.7084e-04
Loss = 1.2536e-02, PNorm = 179.2238, GNorm = 0.1995, lr_0 = 2.7065e-04
Loss = 8.7741e-03, PNorm = 179.2311, GNorm = 0.2441, lr_0 = 2.7047e-04
Loss = 6.8585e-03, PNorm = 179.2390, GNorm = 0.2034, lr_0 = 2.7028e-04
Loss = 9.1424e-03, PNorm = 179.2492, GNorm = 0.1310, lr_0 = 2.7010e-04
Loss = 7.4896e-03, PNorm = 179.2586, GNorm = 0.1477, lr_0 = 2.6991e-04
Loss = 2.6805e-02, PNorm = 179.2660, GNorm = 2.8171, lr_0 = 2.6973e-04
Loss = 1.1434e-02, PNorm = 179.2749, GNorm = 0.1577, lr_0 = 2.6954e-04
Loss = 7.7826e-03, PNorm = 179.2817, GNorm = 0.2557, lr_0 = 2.6936e-04
Loss = 6.7208e-03, PNorm = 179.2922, GNorm = 0.3287, lr_0 = 2.6917e-04
Loss = 1.1720e-02, PNorm = 179.3032, GNorm = 0.5992, lr_0 = 2.6899e-04
Loss = 8.8466e-03, PNorm = 179.3146, GNorm = 0.7384, lr_0 = 2.6880e-04
Loss = 6.9636e-03, PNorm = 179.3223, GNorm = 0.1038, lr_0 = 2.6862e-04
Loss = 7.2749e-03, PNorm = 179.3323, GNorm = 0.4633, lr_0 = 2.6844e-04
Loss = 9.6148e-03, PNorm = 179.3419, GNorm = 0.1072, lr_0 = 2.6825e-04
Validation mae = 0.121173
Epoch 18
Loss = 6.7821e-03, PNorm = 179.3502, GNorm = 0.1166, lr_0 = 2.6807e-04
Loss = 6.8417e-03, PNorm = 179.3570, GNorm = 0.1877, lr_0 = 2.6788e-04
Loss = 9.9018e-03, PNorm = 179.3641, GNorm = 0.1512, lr_0 = 2.6770e-04
Loss = 6.9126e-03, PNorm = 179.3701, GNorm = 0.1162, lr_0 = 2.6752e-04
Loss = 7.4646e-03, PNorm = 179.3761, GNorm = 0.1358, lr_0 = 2.6733e-04
Loss = 7.5348e-03, PNorm = 179.3836, GNorm = 0.1586, lr_0 = 2.6715e-04
Loss = 8.1773e-03, PNorm = 179.3912, GNorm = 0.2135, lr_0 = 2.6697e-04
Loss = 5.5505e-03, PNorm = 179.3999, GNorm = 0.1815, lr_0 = 2.6678e-04
Loss = 7.0029e-03, PNorm = 179.4065, GNorm = 0.1228, lr_0 = 2.6660e-04
Loss = 6.4979e-03, PNorm = 179.4144, GNorm = 0.2596, lr_0 = 2.6642e-04
Loss = 1.0729e-02, PNorm = 179.4227, GNorm = 0.1587, lr_0 = 2.6624e-04
Loss = 8.4828e-03, PNorm = 179.4292, GNorm = 0.3622, lr_0 = 2.6605e-04
Loss = 6.4770e-03, PNorm = 179.4349, GNorm = 0.2051, lr_0 = 2.6587e-04
Loss = 8.2625e-03, PNorm = 179.4389, GNorm = 0.1261, lr_0 = 2.6569e-04
Loss = 1.0602e-02, PNorm = 179.4447, GNorm = 0.5614, lr_0 = 2.6551e-04
Loss = 8.8174e-03, PNorm = 179.4519, GNorm = 0.1348, lr_0 = 2.6533e-04
Loss = 1.3563e-02, PNorm = 179.4563, GNorm = 0.2766, lr_0 = 2.6514e-04
Loss = 1.0662e-02, PNorm = 179.4632, GNorm = 0.4699, lr_0 = 2.6496e-04
Loss = 6.0536e-03, PNorm = 179.4717, GNorm = 0.1447, lr_0 = 2.6478e-04
Loss = 6.9866e-03, PNorm = 179.4799, GNorm = 0.1711, lr_0 = 2.6460e-04
Loss = 7.2101e-03, PNorm = 179.4899, GNorm = 0.2009, lr_0 = 2.6442e-04
Loss = 7.5910e-03, PNorm = 179.4976, GNorm = 0.1572, lr_0 = 2.6424e-04
Loss = 8.6626e-03, PNorm = 179.5050, GNorm = 0.1875, lr_0 = 2.6406e-04
Loss = 6.9245e-03, PNorm = 179.5126, GNorm = 0.1307, lr_0 = 2.6388e-04
Loss = 7.1581e-03, PNorm = 179.5213, GNorm = 0.2031, lr_0 = 2.6369e-04
Loss = 6.9715e-03, PNorm = 179.5304, GNorm = 0.2337, lr_0 = 2.6351e-04
Loss = 7.2595e-03, PNorm = 179.5380, GNorm = 0.2030, lr_0 = 2.6333e-04
Loss = 6.0274e-03, PNorm = 179.5462, GNorm = 0.2425, lr_0 = 2.6315e-04
Loss = 8.4978e-03, PNorm = 179.5549, GNorm = 0.5017, lr_0 = 2.6297e-04
Loss = 1.3275e-02, PNorm = 179.5630, GNorm = 0.5410, lr_0 = 2.6279e-04
Loss = 9.3171e-03, PNorm = 179.5712, GNorm = 0.2772, lr_0 = 2.6261e-04
Loss = 9.7060e-03, PNorm = 179.5796, GNorm = 0.1767, lr_0 = 2.6243e-04
Loss = 6.8484e-03, PNorm = 179.5870, GNorm = 0.1361, lr_0 = 2.6225e-04
Loss = 9.0490e-03, PNorm = 179.5964, GNorm = 0.5751, lr_0 = 2.6207e-04
Loss = 8.5714e-03, PNorm = 179.6027, GNorm = 0.4069, lr_0 = 2.6189e-04
Loss = 5.7594e-03, PNorm = 179.6108, GNorm = 0.1393, lr_0 = 2.6171e-04
Loss = 5.8160e-03, PNorm = 179.6196, GNorm = 0.0939, lr_0 = 2.6153e-04
Loss = 7.4747e-03, PNorm = 179.6281, GNorm = 0.1950, lr_0 = 2.6136e-04
Loss = 6.5101e-03, PNorm = 179.6351, GNorm = 0.4330, lr_0 = 2.6118e-04
Loss = 7.2118e-03, PNorm = 179.6425, GNorm = 0.1355, lr_0 = 2.6100e-04
Loss = 8.3625e-03, PNorm = 179.6509, GNorm = 0.1152, lr_0 = 2.6082e-04
Loss = 8.4170e-03, PNorm = 179.6595, GNorm = 0.3037, lr_0 = 2.6064e-04
Loss = 6.6078e-03, PNorm = 179.6639, GNorm = 0.1809, lr_0 = 2.6046e-04
Loss = 6.6860e-03, PNorm = 179.6718, GNorm = 0.3262, lr_0 = 2.6028e-04
Loss = 8.9083e-03, PNorm = 179.6788, GNorm = 0.2333, lr_0 = 2.6011e-04
Loss = 8.4854e-03, PNorm = 179.6866, GNorm = 0.2583, lr_0 = 2.5993e-04
Loss = 1.1086e-02, PNorm = 179.6934, GNorm = 0.2079, lr_0 = 2.5975e-04
Loss = 7.8422e-03, PNorm = 179.6993, GNorm = 0.2582, lr_0 = 2.5957e-04
Loss = 8.9903e-03, PNorm = 179.7070, GNorm = 0.2909, lr_0 = 2.5939e-04
Loss = 5.9344e-03, PNorm = 179.7160, GNorm = 0.1600, lr_0 = 2.5922e-04
Loss = 6.5503e-03, PNorm = 179.7232, GNorm = 0.1763, lr_0 = 2.5904e-04
Loss = 7.3080e-03, PNorm = 179.7301, GNorm = 0.1951, lr_0 = 2.5886e-04
Loss = 5.2851e-03, PNorm = 179.7383, GNorm = 0.2151, lr_0 = 2.5868e-04
Loss = 6.9604e-03, PNorm = 179.7464, GNorm = 0.1114, lr_0 = 2.5851e-04
Loss = 1.1704e-02, PNorm = 179.7556, GNorm = 0.0990, lr_0 = 2.5833e-04
Loss = 7.3086e-03, PNorm = 179.7620, GNorm = 0.2222, lr_0 = 2.5815e-04
Loss = 9.8617e-03, PNorm = 179.7690, GNorm = 0.1782, lr_0 = 2.5797e-04
Loss = 8.3313e-03, PNorm = 179.7761, GNorm = 0.1882, lr_0 = 2.5780e-04
Loss = 2.4034e-02, PNorm = 179.7839, GNorm = 0.3771, lr_0 = 2.5762e-04
Loss = 8.2757e-03, PNorm = 179.7887, GNorm = 0.1740, lr_0 = 2.5745e-04
Loss = 1.2080e-02, PNorm = 179.7954, GNorm = 0.3851, lr_0 = 2.5727e-04
Loss = 5.1465e-03, PNorm = 179.8021, GNorm = 0.2220, lr_0 = 2.5709e-04
Loss = 8.3873e-03, PNorm = 179.8086, GNorm = 0.2419, lr_0 = 2.5692e-04
Loss = 6.1493e-03, PNorm = 179.8153, GNorm = 0.1395, lr_0 = 2.5674e-04
Loss = 1.0659e-02, PNorm = 179.8236, GNorm = 0.2104, lr_0 = 2.5656e-04
Loss = 1.0814e-02, PNorm = 179.8325, GNorm = 0.3910, lr_0 = 2.5639e-04
Loss = 1.1810e-02, PNorm = 179.8397, GNorm = 0.1627, lr_0 = 2.5621e-04
Loss = 5.5253e-03, PNorm = 179.8461, GNorm = 0.1498, lr_0 = 2.5604e-04
Loss = 1.0785e-02, PNorm = 179.8532, GNorm = 0.0996, lr_0 = 2.5586e-04
Loss = 6.8792e-03, PNorm = 179.8601, GNorm = 0.1443, lr_0 = 2.5569e-04
Loss = 6.0463e-03, PNorm = 179.8674, GNorm = 0.2146, lr_0 = 2.5551e-04
Loss = 1.7494e-02, PNorm = 179.8729, GNorm = 0.4196, lr_0 = 2.5534e-04
Loss = 8.8330e-03, PNorm = 179.8788, GNorm = 0.2991, lr_0 = 2.5516e-04
Loss = 5.8684e-03, PNorm = 179.8875, GNorm = 0.0954, lr_0 = 2.5499e-04
Loss = 5.5656e-03, PNorm = 179.8937, GNorm = 0.1857, lr_0 = 2.5481e-04
Loss = 7.1281e-03, PNorm = 179.8998, GNorm = 0.2641, lr_0 = 2.5464e-04
Loss = 6.1364e-03, PNorm = 179.9074, GNorm = 0.1162, lr_0 = 2.5446e-04
Loss = 7.5347e-03, PNorm = 179.9180, GNorm = 0.1319, lr_0 = 2.5429e-04
Loss = 1.3258e-02, PNorm = 179.9241, GNorm = 0.1263, lr_0 = 2.5411e-04
Loss = 7.7481e-03, PNorm = 179.9312, GNorm = 0.1928, lr_0 = 2.5394e-04
Loss = 7.2688e-03, PNorm = 179.9397, GNorm = 0.1506, lr_0 = 2.5377e-04
Loss = 8.8984e-03, PNorm = 179.9486, GNorm = 0.1412, lr_0 = 2.5359e-04
Loss = 8.5711e-03, PNorm = 179.9570, GNorm = 0.2249, lr_0 = 2.5342e-04
Loss = 8.3227e-03, PNorm = 179.9654, GNorm = 0.1618, lr_0 = 2.5325e-04
Loss = 5.7589e-03, PNorm = 179.9746, GNorm = 0.0851, lr_0 = 2.5307e-04
Loss = 7.4228e-03, PNorm = 179.9855, GNorm = 0.2585, lr_0 = 2.5290e-04
Loss = 8.5071e-03, PNorm = 179.9962, GNorm = 0.2635, lr_0 = 2.5273e-04
Loss = 1.1188e-02, PNorm = 180.0095, GNorm = 0.3042, lr_0 = 2.5255e-04
Loss = 5.8427e-03, PNorm = 180.0194, GNorm = 0.2716, lr_0 = 2.5238e-04
Loss = 6.1958e-03, PNorm = 180.0263, GNorm = 0.1446, lr_0 = 2.5221e-04
Loss = 7.1692e-03, PNorm = 180.0320, GNorm = 0.1502, lr_0 = 2.5203e-04
Loss = 7.0881e-03, PNorm = 180.0387, GNorm = 0.3526, lr_0 = 2.5186e-04
Loss = 7.4726e-03, PNorm = 180.0444, GNorm = 0.1977, lr_0 = 2.5169e-04
Loss = 1.0874e-02, PNorm = 180.0549, GNorm = 0.3466, lr_0 = 2.5152e-04
Loss = 5.1516e-03, PNorm = 180.0666, GNorm = 0.1843, lr_0 = 2.5134e-04
Loss = 6.4346e-03, PNorm = 180.0755, GNorm = 0.1069, lr_0 = 2.5117e-04
Loss = 7.3441e-03, PNorm = 180.0829, GNorm = 0.1804, lr_0 = 2.5100e-04
Loss = 8.5307e-03, PNorm = 180.0898, GNorm = 0.1238, lr_0 = 2.5083e-04
Loss = 7.2367e-03, PNorm = 180.0980, GNorm = 0.2885, lr_0 = 2.5066e-04
Loss = 5.5193e-03, PNorm = 180.1069, GNorm = 0.1440, lr_0 = 2.5048e-04
Loss = 7.3904e-03, PNorm = 180.1164, GNorm = 0.2960, lr_0 = 2.5031e-04
Loss = 6.4278e-03, PNorm = 180.1232, GNorm = 0.1812, lr_0 = 2.5014e-04
Loss = 1.1601e-02, PNorm = 180.1306, GNorm = 0.2246, lr_0 = 2.4997e-04
Loss = 8.4393e-03, PNorm = 180.1392, GNorm = 0.2129, lr_0 = 2.4980e-04
Loss = 1.0177e-02, PNorm = 180.1459, GNorm = 0.1030, lr_0 = 2.4963e-04
Loss = 1.0559e-02, PNorm = 180.1530, GNorm = 0.4014, lr_0 = 2.4946e-04
Loss = 5.0953e-03, PNorm = 180.1611, GNorm = 0.1482, lr_0 = 2.4929e-04
Loss = 1.2944e-02, PNorm = 180.1689, GNorm = 0.5020, lr_0 = 2.4911e-04
Loss = 6.8769e-03, PNorm = 180.1772, GNorm = 0.3896, lr_0 = 2.4894e-04
Loss = 6.9152e-03, PNorm = 180.1830, GNorm = 0.1816, lr_0 = 2.4877e-04
Loss = 6.0975e-03, PNorm = 180.1922, GNorm = 0.1411, lr_0 = 2.4860e-04
Loss = 8.4435e-03, PNorm = 180.1977, GNorm = 0.2244, lr_0 = 2.4843e-04
Loss = 1.1378e-02, PNorm = 180.2041, GNorm = 0.2293, lr_0 = 2.4826e-04
Loss = 7.8787e-03, PNorm = 180.2113, GNorm = 0.2118, lr_0 = 2.4809e-04
Loss = 5.8802e-03, PNorm = 180.2186, GNorm = 0.1670, lr_0 = 2.4792e-04
Loss = 6.3176e-03, PNorm = 180.2267, GNorm = 0.2724, lr_0 = 2.4775e-04
Loss = 1.0086e-02, PNorm = 180.2326, GNorm = 0.1976, lr_0 = 2.4758e-04
Loss = 7.3294e-03, PNorm = 180.2403, GNorm = 0.2117, lr_0 = 2.4741e-04
Loss = 5.4742e-03, PNorm = 180.2477, GNorm = 0.1047, lr_0 = 2.4724e-04
Loss = 5.0760e-03, PNorm = 180.2545, GNorm = 0.1501, lr_0 = 2.4707e-04
Validation mae = 0.121064
Epoch 19
Loss = 9.6545e-03, PNorm = 180.2589, GNorm = 0.2234, lr_0 = 2.4690e-04
Loss = 6.4596e-03, PNorm = 180.2643, GNorm = 0.2026, lr_0 = 2.4674e-04
Loss = 5.9497e-03, PNorm = 180.2688, GNorm = 0.1838, lr_0 = 2.4657e-04
Loss = 6.9523e-03, PNorm = 180.2721, GNorm = 0.2590, lr_0 = 2.4640e-04
Loss = 5.4391e-03, PNorm = 180.2789, GNorm = 0.2127, lr_0 = 2.4623e-04
Loss = 5.3383e-03, PNorm = 180.2845, GNorm = 0.1207, lr_0 = 2.4606e-04
Loss = 5.9908e-03, PNorm = 180.2892, GNorm = 0.1414, lr_0 = 2.4589e-04
Loss = 4.2869e-03, PNorm = 180.2941, GNorm = 0.1315, lr_0 = 2.4572e-04
Loss = 5.2169e-03, PNorm = 180.2981, GNorm = 0.0893, lr_0 = 2.4556e-04
Loss = 5.7073e-03, PNorm = 180.3049, GNorm = 0.1239, lr_0 = 2.4539e-04
Loss = 9.4892e-03, PNorm = 180.3113, GNorm = 0.4588, lr_0 = 2.4522e-04
Loss = 5.8758e-03, PNorm = 180.3200, GNorm = 0.2024, lr_0 = 2.4505e-04
Loss = 9.9733e-03, PNorm = 180.3247, GNorm = 0.2311, lr_0 = 2.4488e-04
Loss = 5.6510e-03, PNorm = 180.3302, GNorm = 0.2139, lr_0 = 2.4472e-04
Loss = 4.7944e-03, PNorm = 180.3340, GNorm = 0.2238, lr_0 = 2.4455e-04
Loss = 5.5104e-03, PNorm = 180.3396, GNorm = 0.2217, lr_0 = 2.4438e-04
Loss = 5.7241e-03, PNorm = 180.3454, GNorm = 0.1839, lr_0 = 2.4421e-04
Loss = 6.0534e-03, PNorm = 180.3495, GNorm = 0.2470, lr_0 = 2.4405e-04
Loss = 6.3729e-03, PNorm = 180.3556, GNorm = 0.1410, lr_0 = 2.4388e-04
Loss = 9.3504e-03, PNorm = 180.3588, GNorm = 0.0757, lr_0 = 2.4371e-04
Loss = 4.7157e-03, PNorm = 180.3632, GNorm = 0.1772, lr_0 = 2.4354e-04
Loss = 4.7871e-03, PNorm = 180.3687, GNorm = 0.2351, lr_0 = 2.4338e-04
Loss = 1.1517e-02, PNorm = 180.3734, GNorm = 0.1701, lr_0 = 2.4321e-04
Loss = 6.7910e-03, PNorm = 180.3789, GNorm = 0.2620, lr_0 = 2.4304e-04
Loss = 9.7178e-03, PNorm = 180.3867, GNorm = 0.2022, lr_0 = 2.4288e-04
Loss = 5.9423e-03, PNorm = 180.3929, GNorm = 0.1241, lr_0 = 2.4271e-04
Loss = 5.1426e-03, PNorm = 180.3991, GNorm = 0.1508, lr_0 = 2.4254e-04
Loss = 7.0518e-03, PNorm = 180.4043, GNorm = 0.1290, lr_0 = 2.4238e-04
Loss = 6.0543e-03, PNorm = 180.4087, GNorm = 0.1936, lr_0 = 2.4221e-04
Loss = 5.7393e-03, PNorm = 180.4134, GNorm = 0.2301, lr_0 = 2.4205e-04
Loss = 5.8064e-03, PNorm = 180.4185, GNorm = 0.1525, lr_0 = 2.4188e-04
Loss = 9.5112e-03, PNorm = 180.4236, GNorm = 0.2782, lr_0 = 2.4171e-04
Loss = 5.5236e-03, PNorm = 180.4306, GNorm = 0.3120, lr_0 = 2.4155e-04
Loss = 7.5720e-03, PNorm = 180.4380, GNorm = 0.1034, lr_0 = 2.4138e-04
Loss = 5.1873e-03, PNorm = 180.4450, GNorm = 0.1159, lr_0 = 2.4122e-04
Loss = 5.6732e-03, PNorm = 180.4507, GNorm = 0.1942, lr_0 = 2.4105e-04
Loss = 1.0145e-02, PNorm = 180.4563, GNorm = 0.1622, lr_0 = 2.4089e-04
Loss = 6.3022e-03, PNorm = 180.4622, GNorm = 0.4510, lr_0 = 2.4072e-04
Loss = 5.6384e-03, PNorm = 180.4651, GNorm = 0.1839, lr_0 = 2.4056e-04
Loss = 6.1437e-03, PNorm = 180.4701, GNorm = 0.1465, lr_0 = 2.4039e-04
Loss = 9.8208e-03, PNorm = 180.4779, GNorm = 0.1227, lr_0 = 2.4023e-04
Loss = 6.3707e-03, PNorm = 180.4862, GNorm = 0.2239, lr_0 = 2.4006e-04
Loss = 5.7061e-03, PNorm = 180.4923, GNorm = 0.0911, lr_0 = 2.3990e-04
Loss = 9.4455e-03, PNorm = 180.4988, GNorm = 0.1934, lr_0 = 2.3974e-04
Loss = 7.2459e-03, PNorm = 180.5048, GNorm = 0.1470, lr_0 = 2.3957e-04
Loss = 4.6441e-03, PNorm = 180.5117, GNorm = 0.1959, lr_0 = 2.3941e-04
Loss = 7.6725e-03, PNorm = 180.5197, GNorm = 0.2712, lr_0 = 2.3924e-04
Loss = 7.6952e-03, PNorm = 180.5265, GNorm = 0.2024, lr_0 = 2.3908e-04
Loss = 7.9541e-03, PNorm = 180.5354, GNorm = 0.1991, lr_0 = 2.3892e-04
Loss = 6.8000e-03, PNorm = 180.5433, GNorm = 0.3255, lr_0 = 2.3875e-04
Loss = 5.4685e-03, PNorm = 180.5497, GNorm = 0.1955, lr_0 = 2.3859e-04
Loss = 4.8230e-03, PNorm = 180.5552, GNorm = 0.1002, lr_0 = 2.3842e-04
Loss = 5.1076e-03, PNorm = 180.5624, GNorm = 0.1077, lr_0 = 2.3826e-04
Loss = 5.9968e-03, PNorm = 180.5687, GNorm = 0.0926, lr_0 = 2.3810e-04
Loss = 5.2993e-03, PNorm = 180.5746, GNorm = 0.0926, lr_0 = 2.3794e-04
Loss = 9.4232e-03, PNorm = 180.5776, GNorm = 0.1561, lr_0 = 2.3777e-04
Loss = 8.9034e-03, PNorm = 180.5826, GNorm = 0.1533, lr_0 = 2.3761e-04
Loss = 9.4538e-03, PNorm = 180.5897, GNorm = 0.1679, lr_0 = 2.3745e-04
Loss = 6.3425e-03, PNorm = 180.5961, GNorm = 0.2743, lr_0 = 2.3728e-04
Loss = 6.0370e-03, PNorm = 180.6024, GNorm = 0.1270, lr_0 = 2.3712e-04
Loss = 4.6192e-03, PNorm = 180.6098, GNorm = 0.1080, lr_0 = 2.3696e-04
Loss = 1.0335e-02, PNorm = 180.6157, GNorm = 0.3235, lr_0 = 2.3680e-04
Loss = 5.7471e-03, PNorm = 180.6217, GNorm = 0.2657, lr_0 = 2.3663e-04
Loss = 1.1427e-02, PNorm = 180.6295, GNorm = 0.0747, lr_0 = 2.3647e-04
Loss = 1.0761e-02, PNorm = 180.6380, GNorm = 0.1225, lr_0 = 2.3631e-04
Loss = 8.5842e-03, PNorm = 180.6440, GNorm = 0.3849, lr_0 = 2.3615e-04
Loss = 1.0251e-02, PNorm = 180.6520, GNorm = 0.2002, lr_0 = 2.3599e-04
Loss = 1.1544e-02, PNorm = 180.6585, GNorm = 0.2454, lr_0 = 2.3582e-04
Loss = 6.6960e-03, PNorm = 180.6661, GNorm = 0.1727, lr_0 = 2.3566e-04
Loss = 1.0064e-02, PNorm = 180.6745, GNorm = 0.2187, lr_0 = 2.3550e-04
Loss = 7.8335e-03, PNorm = 180.6824, GNorm = 0.1604, lr_0 = 2.3534e-04
Loss = 5.3835e-03, PNorm = 180.6898, GNorm = 0.1362, lr_0 = 2.3518e-04
Loss = 7.3324e-03, PNorm = 180.6961, GNorm = 0.1821, lr_0 = 2.3502e-04
Loss = 9.8290e-03, PNorm = 180.7030, GNorm = 0.4804, lr_0 = 2.3486e-04
Loss = 1.1385e-02, PNorm = 180.7112, GNorm = 0.1077, lr_0 = 2.3470e-04
Loss = 4.7668e-03, PNorm = 180.7165, GNorm = 0.1169, lr_0 = 2.3454e-04
Loss = 6.7239e-03, PNorm = 180.7215, GNorm = 0.3043, lr_0 = 2.3437e-04
Loss = 4.9640e-03, PNorm = 180.7272, GNorm = 0.0967, lr_0 = 2.3421e-04
Loss = 6.2734e-03, PNorm = 180.7324, GNorm = 0.1545, lr_0 = 2.3405e-04
Loss = 7.4745e-03, PNorm = 180.7370, GNorm = 0.1552, lr_0 = 2.3389e-04
Loss = 5.2847e-03, PNorm = 180.7450, GNorm = 0.3489, lr_0 = 2.3373e-04
Loss = 1.0816e-02, PNorm = 180.7524, GNorm = 0.2623, lr_0 = 2.3357e-04
Loss = 2.2574e-02, PNorm = 180.7593, GNorm = 2.7608, lr_0 = 2.3341e-04
Loss = 9.2172e-03, PNorm = 180.7678, GNorm = 0.1717, lr_0 = 2.3325e-04
Loss = 4.9654e-03, PNorm = 180.7745, GNorm = 0.1503, lr_0 = 2.3309e-04
Loss = 1.1478e-02, PNorm = 180.7830, GNorm = 0.3928, lr_0 = 2.3293e-04
Loss = 5.2626e-03, PNorm = 180.7923, GNorm = 0.0876, lr_0 = 2.3277e-04
Loss = 6.0259e-03, PNorm = 180.7988, GNorm = 0.1443, lr_0 = 2.3261e-04
Loss = 1.4015e-02, PNorm = 180.8058, GNorm = 0.2462, lr_0 = 2.3246e-04
Loss = 1.0903e-02, PNorm = 180.8132, GNorm = 0.1257, lr_0 = 2.3230e-04
Loss = 5.0601e-03, PNorm = 180.8191, GNorm = 0.1237, lr_0 = 2.3214e-04
Loss = 6.1304e-03, PNorm = 180.8264, GNorm = 0.1267, lr_0 = 2.3198e-04
Loss = 6.3323e-03, PNorm = 180.8307, GNorm = 0.2114, lr_0 = 2.3182e-04
Loss = 5.3440e-03, PNorm = 180.8341, GNorm = 0.1237, lr_0 = 2.3166e-04
Loss = 6.2092e-03, PNorm = 180.8403, GNorm = 0.0969, lr_0 = 2.3150e-04
Loss = 4.9538e-03, PNorm = 180.8480, GNorm = 0.1202, lr_0 = 2.3134e-04
Loss = 4.8521e-03, PNorm = 180.8555, GNorm = 0.1957, lr_0 = 2.3118e-04
Loss = 4.6449e-03, PNorm = 180.8612, GNorm = 0.1050, lr_0 = 2.3103e-04
Loss = 7.0532e-03, PNorm = 180.8680, GNorm = 0.1424, lr_0 = 2.3087e-04
Loss = 5.1859e-03, PNorm = 180.8739, GNorm = 0.1558, lr_0 = 2.3071e-04
Loss = 6.6328e-03, PNorm = 180.8784, GNorm = 0.2538, lr_0 = 2.3055e-04
Loss = 5.4653e-03, PNorm = 180.8828, GNorm = 0.1929, lr_0 = 2.3039e-04
Loss = 5.1419e-03, PNorm = 180.8908, GNorm = 0.0742, lr_0 = 2.3024e-04
Loss = 5.1060e-03, PNorm = 180.8971, GNorm = 0.1353, lr_0 = 2.3008e-04
Loss = 7.2484e-03, PNorm = 180.9041, GNorm = 0.1916, lr_0 = 2.2992e-04
Loss = 6.2146e-03, PNorm = 180.9111, GNorm = 0.1006, lr_0 = 2.2976e-04
Loss = 8.1701e-03, PNorm = 180.9196, GNorm = 0.1156, lr_0 = 2.2961e-04
Loss = 4.5624e-03, PNorm = 180.9263, GNorm = 0.1959, lr_0 = 2.2945e-04
Loss = 1.0456e-02, PNorm = 180.9292, GNorm = 0.1753, lr_0 = 2.2929e-04
Loss = 9.1782e-03, PNorm = 180.9356, GNorm = 0.2127, lr_0 = 2.2913e-04
Loss = 7.5226e-03, PNorm = 180.9422, GNorm = 0.2025, lr_0 = 2.2898e-04
Loss = 5.3099e-03, PNorm = 180.9520, GNorm = 0.1712, lr_0 = 2.2882e-04
Loss = 1.2330e-02, PNorm = 180.9634, GNorm = 0.3073, lr_0 = 2.2866e-04
Loss = 1.2777e-02, PNorm = 180.9715, GNorm = 0.5116, lr_0 = 2.2851e-04
Loss = 6.4528e-03, PNorm = 180.9787, GNorm = 0.3695, lr_0 = 2.2835e-04
Loss = 5.4678e-03, PNorm = 180.9886, GNorm = 0.1585, lr_0 = 2.2819e-04
Loss = 6.0344e-03, PNorm = 180.9953, GNorm = 0.2247, lr_0 = 2.2804e-04
Loss = 8.9422e-03, PNorm = 181.0016, GNorm = 0.0958, lr_0 = 2.2788e-04
Loss = 5.2990e-03, PNorm = 181.0106, GNorm = 0.3217, lr_0 = 2.2773e-04
Loss = 6.2626e-03, PNorm = 181.0176, GNorm = 0.2259, lr_0 = 2.2757e-04
Validation mae = 0.120893
Epoch 20
Loss = 4.3586e-03, PNorm = 181.0244, GNorm = 0.1380, lr_0 = 2.2741e-04
Loss = 6.1797e-03, PNorm = 181.0307, GNorm = 0.2444, lr_0 = 2.2726e-04
Loss = 6.7664e-03, PNorm = 181.0351, GNorm = 0.2052, lr_0 = 2.2710e-04
Loss = 5.1041e-03, PNorm = 181.0405, GNorm = 0.1220, lr_0 = 2.2695e-04
Loss = 4.4911e-03, PNorm = 181.0455, GNorm = 0.2255, lr_0 = 2.2679e-04
Loss = 5.5815e-03, PNorm = 181.0504, GNorm = 0.0921, lr_0 = 2.2664e-04
Loss = 7.3351e-03, PNorm = 181.0551, GNorm = 0.0910, lr_0 = 2.2648e-04
Loss = 5.5734e-03, PNorm = 181.0603, GNorm = 0.1941, lr_0 = 2.2632e-04
Loss = 7.6734e-03, PNorm = 181.0668, GNorm = 0.1562, lr_0 = 2.2617e-04
Loss = 7.8585e-03, PNorm = 181.0723, GNorm = 0.1452, lr_0 = 2.2601e-04
Loss = 4.3905e-03, PNorm = 181.0773, GNorm = 0.1650, lr_0 = 2.2586e-04
Loss = 5.1620e-03, PNorm = 181.0832, GNorm = 0.1369, lr_0 = 2.2571e-04
Loss = 8.1706e-03, PNorm = 181.0884, GNorm = 0.2762, lr_0 = 2.2555e-04
Loss = 4.5035e-03, PNorm = 181.0954, GNorm = 0.1163, lr_0 = 2.2540e-04
Loss = 5.2248e-03, PNorm = 181.0978, GNorm = 0.1005, lr_0 = 2.2524e-04
Loss = 7.3163e-03, PNorm = 181.1020, GNorm = 0.1741, lr_0 = 2.2509e-04
Loss = 1.0917e-02, PNorm = 181.1087, GNorm = 0.4847, lr_0 = 2.2493e-04
Loss = 4.8578e-03, PNorm = 181.1150, GNorm = 0.2387, lr_0 = 2.2478e-04
Loss = 8.6839e-03, PNorm = 181.1206, GNorm = 0.5742, lr_0 = 2.2463e-04
Loss = 5.8691e-03, PNorm = 181.1247, GNorm = 0.7202, lr_0 = 2.2447e-04
Loss = 4.5642e-03, PNorm = 181.1290, GNorm = 0.0805, lr_0 = 2.2432e-04
Loss = 4.7833e-03, PNorm = 181.1320, GNorm = 0.1178, lr_0 = 2.2416e-04
Loss = 5.4482e-03, PNorm = 181.1364, GNorm = 0.1617, lr_0 = 2.2401e-04
Loss = 4.3679e-03, PNorm = 181.1409, GNorm = 0.1195, lr_0 = 2.2386e-04
Loss = 7.2506e-03, PNorm = 181.1456, GNorm = 0.2616, lr_0 = 2.2370e-04
Loss = 6.1572e-03, PNorm = 181.1491, GNorm = 0.2042, lr_0 = 2.2355e-04
Loss = 8.8151e-03, PNorm = 181.1542, GNorm = 0.1036, lr_0 = 2.2340e-04
Loss = 7.6081e-03, PNorm = 181.1572, GNorm = 0.1294, lr_0 = 2.2324e-04
Loss = 4.8898e-03, PNorm = 181.1617, GNorm = 0.1525, lr_0 = 2.2309e-04
Loss = 1.5298e-02, PNorm = 181.1716, GNorm = 0.3631, lr_0 = 2.2294e-04
Loss = 6.7113e-03, PNorm = 181.1756, GNorm = 0.1806, lr_0 = 2.2279e-04
Loss = 8.5132e-03, PNorm = 181.1816, GNorm = 0.2943, lr_0 = 2.2263e-04
Loss = 4.3914e-03, PNorm = 181.1894, GNorm = 0.1034, lr_0 = 2.2248e-04
Loss = 4.7124e-03, PNorm = 181.1941, GNorm = 0.0978, lr_0 = 2.2233e-04
Loss = 6.5901e-03, PNorm = 181.2008, GNorm = 0.1271, lr_0 = 2.2218e-04
Loss = 5.9232e-03, PNorm = 181.2058, GNorm = 0.1749, lr_0 = 2.2202e-04
Loss = 4.0679e-03, PNorm = 181.2104, GNorm = 0.1911, lr_0 = 2.2187e-04
Loss = 5.6445e-03, PNorm = 181.2165, GNorm = 0.1594, lr_0 = 2.2172e-04
Loss = 6.0068e-03, PNorm = 181.2225, GNorm = 0.1263, lr_0 = 2.2157e-04
Loss = 5.6050e-03, PNorm = 181.2272, GNorm = 0.1841, lr_0 = 2.2142e-04
Loss = 9.5053e-03, PNorm = 181.2332, GNorm = 0.3334, lr_0 = 2.2126e-04
Loss = 4.0608e-03, PNorm = 181.2394, GNorm = 0.1707, lr_0 = 2.2111e-04
Loss = 4.6815e-03, PNorm = 181.2442, GNorm = 0.2976, lr_0 = 2.2096e-04
Loss = 4.1376e-03, PNorm = 181.2476, GNorm = 0.1665, lr_0 = 2.2081e-04
Loss = 6.2849e-03, PNorm = 181.2514, GNorm = 0.1402, lr_0 = 2.2066e-04
Loss = 3.8530e-03, PNorm = 181.2563, GNorm = 0.1242, lr_0 = 2.2051e-04
Loss = 6.5794e-03, PNorm = 181.2609, GNorm = 0.3464, lr_0 = 2.2036e-04
Loss = 3.8933e-03, PNorm = 181.2645, GNorm = 0.1634, lr_0 = 2.2021e-04
Loss = 3.6170e-03, PNorm = 181.2696, GNorm = 0.0778, lr_0 = 2.2005e-04
Loss = 6.5404e-03, PNorm = 181.2753, GNorm = 0.1674, lr_0 = 2.1990e-04
Loss = 4.5342e-03, PNorm = 181.2799, GNorm = 0.1153, lr_0 = 2.1975e-04
Loss = 4.5923e-03, PNorm = 181.2848, GNorm = 0.1244, lr_0 = 2.1960e-04
Loss = 5.3662e-03, PNorm = 181.2902, GNorm = 0.1111, lr_0 = 2.1945e-04
Loss = 5.5056e-03, PNorm = 181.2946, GNorm = 0.1287, lr_0 = 2.1930e-04
Loss = 8.7748e-03, PNorm = 181.3023, GNorm = 0.1217, lr_0 = 2.1915e-04
Loss = 3.6246e-03, PNorm = 181.3084, GNorm = 0.1633, lr_0 = 2.1900e-04
Loss = 3.7178e-03, PNorm = 181.3133, GNorm = 0.1421, lr_0 = 2.1885e-04
Loss = 5.3495e-03, PNorm = 181.3178, GNorm = 0.0956, lr_0 = 2.1870e-04
Loss = 5.0355e-03, PNorm = 181.3212, GNorm = 0.2426, lr_0 = 2.1855e-04
Loss = 4.2188e-03, PNorm = 181.3259, GNorm = 0.1590, lr_0 = 2.1840e-04
Loss = 8.2162e-03, PNorm = 181.3313, GNorm = 0.1267, lr_0 = 2.1825e-04
Loss = 4.3577e-03, PNorm = 181.3358, GNorm = 0.1052, lr_0 = 2.1810e-04
Loss = 4.4768e-03, PNorm = 181.3415, GNorm = 0.2754, lr_0 = 2.1795e-04
Loss = 5.9631e-03, PNorm = 181.3467, GNorm = 0.3059, lr_0 = 2.1780e-04
Loss = 7.5335e-03, PNorm = 181.3526, GNorm = 0.2743, lr_0 = 2.1765e-04
Loss = 5.4762e-03, PNorm = 181.3592, GNorm = 0.0850, lr_0 = 2.1751e-04
Loss = 4.6546e-03, PNorm = 181.3644, GNorm = 0.2285, lr_0 = 2.1736e-04
Loss = 9.0138e-03, PNorm = 181.3676, GNorm = 0.2179, lr_0 = 2.1721e-04
Loss = 4.8139e-03, PNorm = 181.3720, GNorm = 0.2433, lr_0 = 2.1706e-04
Loss = 5.1301e-03, PNorm = 181.3783, GNorm = 0.1432, lr_0 = 2.1691e-04
Loss = 7.3090e-03, PNorm = 181.3819, GNorm = 0.0932, lr_0 = 2.1676e-04
Loss = 5.2046e-03, PNorm = 181.3874, GNorm = 0.2064, lr_0 = 2.1661e-04
Loss = 7.1436e-03, PNorm = 181.3928, GNorm = 0.2641, lr_0 = 2.1646e-04
Loss = 5.5847e-03, PNorm = 181.3975, GNorm = 0.2570, lr_0 = 2.1632e-04
Loss = 1.6323e-02, PNorm = 181.4050, GNorm = 0.0713, lr_0 = 2.1617e-04
Loss = 7.2746e-03, PNorm = 181.4127, GNorm = 0.1221, lr_0 = 2.1602e-04
Loss = 5.7250e-03, PNorm = 181.4191, GNorm = 0.1229, lr_0 = 2.1587e-04
Loss = 5.0480e-03, PNorm = 181.4251, GNorm = 0.3451, lr_0 = 2.1572e-04
Loss = 7.2755e-03, PNorm = 181.4322, GNorm = 0.1341, lr_0 = 2.1558e-04
Loss = 6.2912e-03, PNorm = 181.4383, GNorm = 0.2038, lr_0 = 2.1543e-04
Loss = 1.5305e-02, PNorm = 181.4407, GNorm = 0.6041, lr_0 = 2.1528e-04
Loss = 5.0080e-03, PNorm = 181.4468, GNorm = 0.1400, lr_0 = 2.1513e-04
Loss = 6.1283e-03, PNorm = 181.4543, GNorm = 0.6121, lr_0 = 2.1499e-04
Loss = 5.5008e-03, PNorm = 181.4608, GNorm = 0.6938, lr_0 = 2.1484e-04
Loss = 7.1175e-03, PNorm = 181.4660, GNorm = 0.3982, lr_0 = 2.1469e-04
Loss = 3.6083e-03, PNorm = 181.4724, GNorm = 0.2396, lr_0 = 2.1454e-04
Loss = 7.7301e-03, PNorm = 181.4789, GNorm = 0.8183, lr_0 = 2.1440e-04
Loss = 5.7594e-03, PNorm = 181.4849, GNorm = 0.1816, lr_0 = 2.1425e-04
Loss = 4.4269e-03, PNorm = 181.4911, GNorm = 0.0927, lr_0 = 2.1410e-04
Loss = 3.6240e-03, PNorm = 181.4964, GNorm = 0.0867, lr_0 = 2.1396e-04
Loss = 5.7702e-03, PNorm = 181.5015, GNorm = 0.3043, lr_0 = 2.1381e-04
Loss = 6.7209e-03, PNorm = 181.5041, GNorm = 0.0910, lr_0 = 2.1366e-04
Loss = 1.2277e-02, PNorm = 181.5112, GNorm = 0.5870, lr_0 = 2.1352e-04
Loss = 6.3283e-03, PNorm = 181.5167, GNorm = 0.1288, lr_0 = 2.1337e-04
Loss = 7.6273e-03, PNorm = 181.5232, GNorm = 0.3592, lr_0 = 2.1323e-04
Loss = 1.4100e-02, PNorm = 181.5281, GNorm = 0.2684, lr_0 = 2.1308e-04
Loss = 1.1041e-02, PNorm = 181.5331, GNorm = 0.3176, lr_0 = 2.1293e-04
Loss = 3.7428e-03, PNorm = 181.5389, GNorm = 0.1172, lr_0 = 2.1279e-04
Loss = 6.2329e-03, PNorm = 181.5445, GNorm = 0.1387, lr_0 = 2.1264e-04
Loss = 5.6455e-03, PNorm = 181.5488, GNorm = 0.1646, lr_0 = 2.1250e-04
Loss = 9.9826e-03, PNorm = 181.5540, GNorm = 0.1842, lr_0 = 2.1235e-04
Loss = 5.5537e-03, PNorm = 181.5585, GNorm = 0.1121, lr_0 = 2.1221e-04
Loss = 4.7373e-03, PNorm = 181.5633, GNorm = 0.0963, lr_0 = 2.1206e-04
Loss = 7.3493e-03, PNorm = 181.5671, GNorm = 0.1509, lr_0 = 2.1191e-04
Loss = 4.8723e-03, PNorm = 181.5719, GNorm = 0.1916, lr_0 = 2.1177e-04
Loss = 5.2512e-03, PNorm = 181.5792, GNorm = 0.0604, lr_0 = 2.1162e-04
Loss = 6.7384e-03, PNorm = 181.5856, GNorm = 0.1715, lr_0 = 2.1148e-04
Loss = 3.6980e-03, PNorm = 181.5908, GNorm = 0.0863, lr_0 = 2.1133e-04
Loss = 4.2076e-03, PNorm = 181.5956, GNorm = 0.0988, lr_0 = 2.1119e-04
Loss = 7.7074e-03, PNorm = 181.6005, GNorm = 0.1024, lr_0 = 2.1104e-04
Loss = 9.7733e-03, PNorm = 181.6086, GNorm = 0.0880, lr_0 = 2.1090e-04
Loss = 5.7583e-03, PNorm = 181.6151, GNorm = 0.0922, lr_0 = 2.1076e-04
Loss = 8.6313e-03, PNorm = 181.6200, GNorm = 0.1872, lr_0 = 2.1061e-04
Loss = 5.7719e-03, PNorm = 181.6235, GNorm = 0.1070, lr_0 = 2.1047e-04
Loss = 4.1375e-03, PNorm = 181.6273, GNorm = 0.1157, lr_0 = 2.1032e-04
Loss = 5.7167e-03, PNorm = 181.6317, GNorm = 0.4129, lr_0 = 2.1018e-04
Loss = 4.6756e-03, PNorm = 181.6369, GNorm = 0.1448, lr_0 = 2.1003e-04
Loss = 8.3869e-03, PNorm = 181.6408, GNorm = 0.5611, lr_0 = 2.0989e-04
Loss = 7.4750e-03, PNorm = 181.6462, GNorm = 0.1670, lr_0 = 2.0975e-04
Loss = 8.0954e-03, PNorm = 181.6525, GNorm = 0.1604, lr_0 = 2.0960e-04
Validation mae = 0.120808
Epoch 21
Loss = 6.5209e-03, PNorm = 181.6573, GNorm = 0.1661, lr_0 = 2.0946e-04
Loss = 5.8354e-03, PNorm = 181.6601, GNorm = 0.2023, lr_0 = 2.0932e-04
Loss = 7.0658e-03, PNorm = 181.6645, GNorm = 0.2089, lr_0 = 2.0917e-04
Loss = 4.7771e-03, PNorm = 181.6693, GNorm = 0.0766, lr_0 = 2.0903e-04
Loss = 5.9833e-03, PNorm = 181.6741, GNorm = 0.0912, lr_0 = 2.0889e-04
Loss = 5.3810e-03, PNorm = 181.6779, GNorm = 0.1420, lr_0 = 2.0874e-04
Loss = 6.8638e-03, PNorm = 181.6824, GNorm = 0.1248, lr_0 = 2.0860e-04
Loss = 7.1970e-03, PNorm = 181.6887, GNorm = 0.1254, lr_0 = 2.0846e-04
Loss = 4.4414e-03, PNorm = 181.6937, GNorm = 0.2565, lr_0 = 2.0831e-04
Loss = 7.8958e-03, PNorm = 181.6990, GNorm = 0.4479, lr_0 = 2.0817e-04
Loss = 6.1216e-03, PNorm = 181.7048, GNorm = 0.0722, lr_0 = 2.0803e-04
Loss = 5.0062e-03, PNorm = 181.7096, GNorm = 0.1538, lr_0 = 2.0789e-04
Loss = 5.0508e-03, PNorm = 181.7148, GNorm = 0.1554, lr_0 = 2.0774e-04
Loss = 5.4562e-03, PNorm = 181.7197, GNorm = 0.3372, lr_0 = 2.0760e-04
Loss = 5.4259e-03, PNorm = 181.7232, GNorm = 0.1038, lr_0 = 2.0746e-04
Loss = 7.7449e-03, PNorm = 181.7265, GNorm = 0.2296, lr_0 = 2.0732e-04
Loss = 5.3254e-03, PNorm = 181.7307, GNorm = 0.1553, lr_0 = 2.0718e-04
Loss = 4.2817e-03, PNorm = 181.7339, GNorm = 0.0688, lr_0 = 2.0703e-04
Loss = 3.8369e-03, PNorm = 181.7402, GNorm = 0.2167, lr_0 = 2.0689e-04
Loss = 4.2308e-03, PNorm = 181.7483, GNorm = 0.2955, lr_0 = 2.0675e-04
Loss = 3.9630e-03, PNorm = 181.7511, GNorm = 0.1035, lr_0 = 2.0661e-04
Loss = 4.1783e-03, PNorm = 181.7539, GNorm = 0.1784, lr_0 = 2.0647e-04
Loss = 3.8898e-03, PNorm = 181.7577, GNorm = 0.2753, lr_0 = 2.0633e-04
Loss = 4.1299e-03, PNorm = 181.7624, GNorm = 0.1628, lr_0 = 2.0618e-04
Loss = 7.5785e-03, PNorm = 181.7671, GNorm = 0.1669, lr_0 = 2.0604e-04
Loss = 6.9935e-03, PNorm = 181.7705, GNorm = 0.0693, lr_0 = 2.0590e-04
Loss = 6.4322e-03, PNorm = 181.7746, GNorm = 0.1383, lr_0 = 2.0576e-04
Loss = 3.6698e-03, PNorm = 181.7779, GNorm = 0.1838, lr_0 = 2.0562e-04
Loss = 5.4305e-03, PNorm = 181.7797, GNorm = 0.2163, lr_0 = 2.0548e-04
Loss = 6.4288e-03, PNorm = 181.7830, GNorm = 0.2007, lr_0 = 2.0534e-04
Loss = 7.7385e-03, PNorm = 181.7894, GNorm = 0.2141, lr_0 = 2.0520e-04
Loss = 4.6818e-03, PNorm = 181.7933, GNorm = 0.1699, lr_0 = 2.0506e-04
Loss = 5.2380e-03, PNorm = 181.7957, GNorm = 0.0728, lr_0 = 2.0492e-04
Loss = 6.6874e-03, PNorm = 181.8016, GNorm = 0.1278, lr_0 = 2.0478e-04
Loss = 3.9641e-03, PNorm = 181.8053, GNorm = 0.2187, lr_0 = 2.0464e-04
Loss = 4.0109e-03, PNorm = 181.8087, GNorm = 0.1892, lr_0 = 2.0450e-04
Loss = 4.0189e-03, PNorm = 181.8129, GNorm = 0.1069, lr_0 = 2.0436e-04
Loss = 5.8409e-03, PNorm = 181.8166, GNorm = 0.1468, lr_0 = 2.0422e-04
Loss = 4.8408e-03, PNorm = 181.8229, GNorm = 0.1190, lr_0 = 2.0408e-04
Loss = 4.3488e-03, PNorm = 181.8291, GNorm = 0.1683, lr_0 = 2.0394e-04
Loss = 7.3217e-03, PNorm = 181.8337, GNorm = 0.1327, lr_0 = 2.0380e-04
Loss = 5.1294e-03, PNorm = 181.8381, GNorm = 0.1110, lr_0 = 2.0366e-04
Loss = 9.0530e-03, PNorm = 181.8418, GNorm = 0.2027, lr_0 = 2.0352e-04
Loss = 6.8934e-03, PNorm = 181.8469, GNorm = 0.0903, lr_0 = 2.0338e-04
Loss = 8.3540e-03, PNorm = 181.8524, GNorm = 0.2027, lr_0 = 2.0324e-04
Loss = 4.2751e-03, PNorm = 181.8578, GNorm = 0.1764, lr_0 = 2.0310e-04
Loss = 5.7994e-03, PNorm = 181.8639, GNorm = 0.4006, lr_0 = 2.0296e-04
Loss = 5.2777e-03, PNorm = 181.8683, GNorm = 0.1906, lr_0 = 2.0282e-04
Loss = 6.9395e-03, PNorm = 181.8710, GNorm = 0.9726, lr_0 = 2.0268e-04
Loss = 3.5469e-03, PNorm = 181.8779, GNorm = 0.1470, lr_0 = 2.0254e-04
Loss = 4.2328e-03, PNorm = 181.8818, GNorm = 0.1346, lr_0 = 2.0240e-04
Loss = 4.3400e-03, PNorm = 181.8852, GNorm = 0.0914, lr_0 = 2.0227e-04
Loss = 5.1781e-03, PNorm = 181.8894, GNorm = 0.1707, lr_0 = 2.0213e-04
Loss = 8.8522e-03, PNorm = 181.8932, GNorm = 0.2561, lr_0 = 2.0199e-04
Loss = 6.0010e-03, PNorm = 181.8965, GNorm = 0.2949, lr_0 = 2.0185e-04
Loss = 3.5956e-03, PNorm = 181.9000, GNorm = 0.2078, lr_0 = 2.0171e-04
Loss = 4.7697e-03, PNorm = 181.9051, GNorm = 0.1368, lr_0 = 2.0157e-04
Loss = 5.8725e-03, PNorm = 181.9112, GNorm = 0.1870, lr_0 = 2.0144e-04
Loss = 8.2420e-03, PNorm = 181.9129, GNorm = 0.1829, lr_0 = 2.0130e-04
Loss = 3.6999e-03, PNorm = 181.9174, GNorm = 0.1410, lr_0 = 2.0116e-04
Loss = 6.4967e-03, PNorm = 181.9235, GNorm = 0.2035, lr_0 = 2.0102e-04
Loss = 3.4655e-03, PNorm = 181.9282, GNorm = 0.1157, lr_0 = 2.0088e-04
Loss = 6.4112e-03, PNorm = 181.9349, GNorm = 0.1196, lr_0 = 2.0075e-04
Loss = 9.3946e-03, PNorm = 181.9391, GNorm = 0.1736, lr_0 = 2.0061e-04
Loss = 3.9640e-03, PNorm = 181.9423, GNorm = 0.0887, lr_0 = 2.0047e-04
Loss = 5.5161e-03, PNorm = 181.9467, GNorm = 0.2449, lr_0 = 2.0033e-04
Loss = 4.8336e-03, PNorm = 181.9502, GNorm = 0.1897, lr_0 = 2.0020e-04
Loss = 3.9545e-03, PNorm = 181.9547, GNorm = 0.0786, lr_0 = 2.0006e-04
Loss = 6.3706e-03, PNorm = 181.9602, GNorm = 0.0862, lr_0 = 1.9992e-04
Loss = 3.9235e-03, PNorm = 181.9643, GNorm = 0.1818, lr_0 = 1.9979e-04
Loss = 1.1396e-02, PNorm = 181.9694, GNorm = 0.2001, lr_0 = 1.9965e-04
Loss = 3.2201e-03, PNorm = 181.9753, GNorm = 0.1712, lr_0 = 1.9951e-04
Loss = 6.8208e-03, PNorm = 181.9784, GNorm = 0.1166, lr_0 = 1.9938e-04
Loss = 4.7764e-03, PNorm = 181.9818, GNorm = 0.1526, lr_0 = 1.9924e-04
Loss = 4.7662e-03, PNorm = 181.9854, GNorm = 0.1282, lr_0 = 1.9910e-04
Loss = 4.0299e-03, PNorm = 181.9884, GNorm = 0.1618, lr_0 = 1.9897e-04
Loss = 4.8811e-03, PNorm = 181.9926, GNorm = 0.0962, lr_0 = 1.9883e-04
Loss = 3.5334e-03, PNorm = 181.9983, GNorm = 0.1159, lr_0 = 1.9869e-04
Loss = 5.7247e-03, PNorm = 182.0054, GNorm = 0.3225, lr_0 = 1.9856e-04
Loss = 4.1350e-03, PNorm = 182.0104, GNorm = 0.0835, lr_0 = 1.9842e-04
Loss = 2.9226e-03, PNorm = 182.0146, GNorm = 0.1578, lr_0 = 1.9829e-04
Loss = 3.6521e-03, PNorm = 182.0191, GNorm = 0.1060, lr_0 = 1.9815e-04
Loss = 2.3371e-02, PNorm = 182.0253, GNorm = 0.1078, lr_0 = 1.9801e-04
Loss = 6.3004e-03, PNorm = 182.0277, GNorm = 0.2592, lr_0 = 1.9788e-04
Loss = 7.2812e-03, PNorm = 182.0316, GNorm = 1.3750, lr_0 = 1.9774e-04
Loss = 6.0242e-03, PNorm = 182.0373, GNorm = 0.1339, lr_0 = 1.9761e-04
Loss = 4.0545e-03, PNorm = 182.0409, GNorm = 0.2016, lr_0 = 1.9747e-04
Loss = 4.9510e-03, PNorm = 182.0448, GNorm = 0.1559, lr_0 = 1.9734e-04
Loss = 4.2562e-03, PNorm = 182.0500, GNorm = 0.0852, lr_0 = 1.9720e-04
Loss = 3.5479e-03, PNorm = 182.0550, GNorm = 0.1526, lr_0 = 1.9707e-04
Loss = 3.4312e-03, PNorm = 182.0593, GNorm = 0.1940, lr_0 = 1.9693e-04
Loss = 3.6246e-03, PNorm = 182.0638, GNorm = 0.3164, lr_0 = 1.9680e-04
Loss = 9.4086e-03, PNorm = 182.0685, GNorm = 0.2321, lr_0 = 1.9666e-04
Loss = 6.3687e-03, PNorm = 182.0728, GNorm = 0.2122, lr_0 = 1.9653e-04
Loss = 1.0684e-02, PNorm = 182.0753, GNorm = 0.2702, lr_0 = 1.9639e-04
Loss = 3.8014e-03, PNorm = 182.0783, GNorm = 0.1521, lr_0 = 1.9626e-04
Loss = 8.7750e-03, PNorm = 182.0817, GNorm = 0.2571, lr_0 = 1.9612e-04
Loss = 4.5216e-03, PNorm = 182.0854, GNorm = 0.1217, lr_0 = 1.9599e-04
Loss = 4.7235e-03, PNorm = 182.0918, GNorm = 0.0994, lr_0 = 1.9585e-04
Loss = 5.9829e-03, PNorm = 182.0974, GNorm = 0.2846, lr_0 = 1.9572e-04
Loss = 1.0852e-02, PNorm = 182.1028, GNorm = 0.2157, lr_0 = 1.9559e-04
Loss = 3.6115e-03, PNorm = 182.1093, GNorm = 0.2746, lr_0 = 1.9545e-04
Loss = 7.5996e-03, PNorm = 182.1146, GNorm = 0.1956, lr_0 = 1.9532e-04
Loss = 7.4799e-03, PNorm = 182.1204, GNorm = 0.7035, lr_0 = 1.9518e-04
Loss = 4.0771e-03, PNorm = 182.1265, GNorm = 0.1245, lr_0 = 1.9505e-04
Loss = 4.1581e-03, PNorm = 182.1316, GNorm = 0.2088, lr_0 = 1.9492e-04
Loss = 6.6369e-03, PNorm = 182.1361, GNorm = 0.2359, lr_0 = 1.9478e-04
Loss = 9.3457e-03, PNorm = 182.1389, GNorm = 0.1781, lr_0 = 1.9465e-04
Loss = 6.3852e-03, PNorm = 182.1447, GNorm = 0.0810, lr_0 = 1.9452e-04
Loss = 3.2203e-03, PNorm = 182.1495, GNorm = 0.0967, lr_0 = 1.9438e-04
Loss = 6.1639e-03, PNorm = 182.1528, GNorm = 0.1224, lr_0 = 1.9425e-04
Loss = 4.2284e-03, PNorm = 182.1554, GNorm = 0.1987, lr_0 = 1.9412e-04
Loss = 3.8930e-03, PNorm = 182.1578, GNorm = 0.0688, lr_0 = 1.9398e-04
Loss = 3.5688e-03, PNorm = 182.1614, GNorm = 0.0722, lr_0 = 1.9385e-04
Loss = 5.0919e-03, PNorm = 182.1672, GNorm = 0.3421, lr_0 = 1.9372e-04
Loss = 5.6432e-03, PNorm = 182.1718, GNorm = 0.1915, lr_0 = 1.9359e-04
Loss = 6.2385e-03, PNorm = 182.1760, GNorm = 0.2307, lr_0 = 1.9345e-04
Loss = 6.4632e-03, PNorm = 182.1806, GNorm = 0.1990, lr_0 = 1.9332e-04
Loss = 4.2032e-03, PNorm = 182.1865, GNorm = 0.1447, lr_0 = 1.9319e-04
Loss = 8.1914e-03, PNorm = 182.1921, GNorm = 0.0948, lr_0 = 1.9306e-04
Validation mae = 0.120976
Epoch 22
Loss = 3.3920e-03, PNorm = 182.1962, GNorm = 0.1227, lr_0 = 1.9292e-04
Loss = 3.2290e-03, PNorm = 182.2016, GNorm = 0.0899, lr_0 = 1.9279e-04
Loss = 7.6228e-03, PNorm = 182.2040, GNorm = 0.1090, lr_0 = 1.9266e-04
Loss = 6.9639e-03, PNorm = 182.2050, GNorm = 0.1506, lr_0 = 1.9253e-04
Loss = 6.4589e-03, PNorm = 182.2091, GNorm = 0.1523, lr_0 = 1.9240e-04
Loss = 3.6980e-03, PNorm = 182.2106, GNorm = 0.1097, lr_0 = 1.9226e-04
Loss = 3.7970e-03, PNorm = 182.2140, GNorm = 0.0927, lr_0 = 1.9213e-04
Loss = 3.8860e-03, PNorm = 182.2186, GNorm = 0.1493, lr_0 = 1.9200e-04
Loss = 4.3251e-03, PNorm = 182.2209, GNorm = 0.1024, lr_0 = 1.9187e-04
Loss = 9.0542e-03, PNorm = 182.2221, GNorm = 0.3269, lr_0 = 1.9174e-04
Loss = 5.5400e-03, PNorm = 182.2262, GNorm = 0.2636, lr_0 = 1.9161e-04
Loss = 6.5751e-03, PNorm = 182.2333, GNorm = 0.1635, lr_0 = 1.9148e-04
Loss = 3.1591e-03, PNorm = 182.2392, GNorm = 0.0958, lr_0 = 1.9134e-04
Loss = 8.0170e-03, PNorm = 182.2475, GNorm = 0.1279, lr_0 = 1.9121e-04
Loss = 4.4061e-03, PNorm = 182.2532, GNorm = 0.3120, lr_0 = 1.9108e-04
Loss = 5.2247e-03, PNorm = 182.2590, GNorm = 0.1189, lr_0 = 1.9095e-04
Loss = 2.9565e-03, PNorm = 182.2647, GNorm = 0.1214, lr_0 = 1.9082e-04
Loss = 3.6342e-03, PNorm = 182.2682, GNorm = 0.1409, lr_0 = 1.9069e-04
Loss = 3.4742e-03, PNorm = 182.2715, GNorm = 0.2005, lr_0 = 1.9056e-04
Loss = 4.3680e-03, PNorm = 182.2719, GNorm = 0.1119, lr_0 = 1.9043e-04
Loss = 4.4910e-03, PNorm = 182.2737, GNorm = 0.1925, lr_0 = 1.9030e-04
Loss = 3.8163e-03, PNorm = 182.2783, GNorm = 0.0760, lr_0 = 1.9017e-04
Loss = 3.9284e-03, PNorm = 182.2819, GNorm = 0.1321, lr_0 = 1.9004e-04
Loss = 2.8913e-03, PNorm = 182.2854, GNorm = 0.0916, lr_0 = 1.8991e-04
Loss = 6.0457e-03, PNorm = 182.2901, GNorm = 0.0542, lr_0 = 1.8978e-04
Loss = 4.5098e-03, PNorm = 182.2922, GNorm = 0.0561, lr_0 = 1.8965e-04
Loss = 5.5208e-03, PNorm = 182.2936, GNorm = 0.1255, lr_0 = 1.8952e-04
Loss = 4.3123e-03, PNorm = 182.2965, GNorm = 0.1424, lr_0 = 1.8939e-04
Loss = 4.4858e-03, PNorm = 182.2987, GNorm = 0.0803, lr_0 = 1.8926e-04
Loss = 4.5440e-03, PNorm = 182.3008, GNorm = 0.1659, lr_0 = 1.8913e-04
Loss = 4.2289e-03, PNorm = 182.3036, GNorm = 0.1936, lr_0 = 1.8900e-04
Loss = 3.6387e-03, PNorm = 182.3059, GNorm = 0.1619, lr_0 = 1.8887e-04
Loss = 8.5585e-03, PNorm = 182.3081, GNorm = 0.0806, lr_0 = 1.8874e-04
Loss = 4.4378e-03, PNorm = 182.3123, GNorm = 0.1355, lr_0 = 1.8861e-04
Loss = 6.7294e-03, PNorm = 182.3164, GNorm = 0.4855, lr_0 = 1.8848e-04
Loss = 2.9993e-03, PNorm = 182.3218, GNorm = 0.1959, lr_0 = 1.8835e-04
Loss = 4.5681e-03, PNorm = 182.3271, GNorm = 0.2097, lr_0 = 1.8822e-04
Loss = 3.9352e-03, PNorm = 182.3296, GNorm = 0.1274, lr_0 = 1.8809e-04
Loss = 2.9381e-03, PNorm = 182.3331, GNorm = 0.0828, lr_0 = 1.8797e-04
Loss = 1.7321e-02, PNorm = 182.3377, GNorm = 0.4223, lr_0 = 1.8784e-04
Loss = 5.8561e-03, PNorm = 182.3386, GNorm = 0.1542, lr_0 = 1.8771e-04
Loss = 8.1871e-03, PNorm = 182.3424, GNorm = 0.1770, lr_0 = 1.8758e-04
Loss = 3.2507e-03, PNorm = 182.3481, GNorm = 0.0893, lr_0 = 1.8745e-04
Loss = 3.4243e-03, PNorm = 182.3533, GNorm = 0.0833, lr_0 = 1.8732e-04
Loss = 3.5474e-03, PNorm = 182.3585, GNorm = 0.1265, lr_0 = 1.8719e-04
Loss = 3.9165e-03, PNorm = 182.3629, GNorm = 0.2592, lr_0 = 1.8707e-04
Loss = 9.3139e-03, PNorm = 182.3662, GNorm = 0.1905, lr_0 = 1.8694e-04
Loss = 4.2645e-03, PNorm = 182.3685, GNorm = 0.0926, lr_0 = 1.8681e-04
Loss = 1.0263e-02, PNorm = 182.3683, GNorm = 0.0722, lr_0 = 1.8668e-04
Loss = 5.1466e-03, PNorm = 182.3693, GNorm = 0.0632, lr_0 = 1.8655e-04
Loss = 4.2985e-03, PNorm = 182.3716, GNorm = 0.1443, lr_0 = 1.8643e-04
Loss = 1.1565e-02, PNorm = 182.3758, GNorm = 0.0853, lr_0 = 1.8630e-04
Loss = 4.5183e-03, PNorm = 182.3805, GNorm = 0.1514, lr_0 = 1.8617e-04
Loss = 4.4609e-03, PNorm = 182.3849, GNorm = 0.0858, lr_0 = 1.8604e-04
Loss = 3.8985e-03, PNorm = 182.3910, GNorm = 0.1374, lr_0 = 1.8592e-04
Loss = 3.8944e-03, PNorm = 182.3966, GNorm = 0.1575, lr_0 = 1.8579e-04
Loss = 4.3300e-03, PNorm = 182.4005, GNorm = 0.0805, lr_0 = 1.8566e-04
Loss = 3.5347e-03, PNorm = 182.4045, GNorm = 0.0755, lr_0 = 1.8553e-04
Loss = 6.7244e-03, PNorm = 182.4101, GNorm = 0.2421, lr_0 = 1.8541e-04
Loss = 3.4732e-03, PNorm = 182.4136, GNorm = 0.1929, lr_0 = 1.8528e-04
Loss = 3.5060e-03, PNorm = 182.4177, GNorm = 0.1098, lr_0 = 1.8515e-04
Loss = 3.9983e-03, PNorm = 182.4204, GNorm = 0.2097, lr_0 = 1.8503e-04
Loss = 5.0473e-03, PNorm = 182.4233, GNorm = 0.6510, lr_0 = 1.8490e-04
Loss = 6.5902e-03, PNorm = 182.4257, GNorm = 0.1324, lr_0 = 1.8477e-04
Loss = 3.5586e-03, PNorm = 182.4283, GNorm = 0.0768, lr_0 = 1.8465e-04
Loss = 4.2776e-03, PNorm = 182.4327, GNorm = 0.0958, lr_0 = 1.8452e-04
Loss = 7.8932e-03, PNorm = 182.4369, GNorm = 0.1242, lr_0 = 1.8439e-04
Loss = 6.5017e-03, PNorm = 182.4409, GNorm = 0.1464, lr_0 = 1.8427e-04
Loss = 4.3684e-03, PNorm = 182.4440, GNorm = 0.1887, lr_0 = 1.8414e-04
Loss = 4.0127e-03, PNorm = 182.4484, GNorm = 0.1861, lr_0 = 1.8401e-04
Loss = 3.7397e-03, PNorm = 182.4529, GNorm = 0.0932, lr_0 = 1.8389e-04
Loss = 3.6237e-03, PNorm = 182.4569, GNorm = 0.1424, lr_0 = 1.8376e-04
Loss = 3.2919e-03, PNorm = 182.4595, GNorm = 0.1997, lr_0 = 1.8364e-04
Loss = 4.9882e-03, PNorm = 182.4642, GNorm = 0.1842, lr_0 = 1.8351e-04
Loss = 5.9650e-03, PNorm = 182.4682, GNorm = 0.2256, lr_0 = 1.8338e-04
Loss = 5.2296e-03, PNorm = 182.4729, GNorm = 0.1333, lr_0 = 1.8326e-04
Loss = 8.1082e-03, PNorm = 182.4759, GNorm = 0.2346, lr_0 = 1.8313e-04
Loss = 6.5164e-03, PNorm = 182.4797, GNorm = 0.1906, lr_0 = 1.8301e-04
Loss = 5.1907e-03, PNorm = 182.4847, GNorm = 0.1167, lr_0 = 1.8288e-04
Loss = 3.8070e-03, PNorm = 182.4891, GNorm = 0.1209, lr_0 = 1.8276e-04
Loss = 6.8231e-03, PNorm = 182.4941, GNorm = 0.0771, lr_0 = 1.8263e-04
Loss = 3.5971e-03, PNorm = 182.4989, GNorm = 0.1457, lr_0 = 1.8251e-04
Loss = 2.5321e-03, PNorm = 182.5013, GNorm = 0.0752, lr_0 = 1.8238e-04
Loss = 4.9973e-03, PNorm = 182.5033, GNorm = 0.1543, lr_0 = 1.8226e-04
Loss = 4.9346e-03, PNorm = 182.5064, GNorm = 0.1351, lr_0 = 1.8213e-04
Loss = 4.3333e-03, PNorm = 182.5119, GNorm = 0.0797, lr_0 = 1.8201e-04
Loss = 4.2072e-03, PNorm = 182.5172, GNorm = 0.1132, lr_0 = 1.8188e-04
Loss = 8.5576e-03, PNorm = 182.5200, GNorm = 0.2420, lr_0 = 1.8176e-04
Loss = 7.8994e-03, PNorm = 182.5248, GNorm = 0.0732, lr_0 = 1.8163e-04
Loss = 4.7577e-03, PNorm = 182.5308, GNorm = 0.1826, lr_0 = 1.8151e-04
Loss = 3.6681e-03, PNorm = 182.5372, GNorm = 0.1233, lr_0 = 1.8138e-04
Loss = 3.2040e-03, PNorm = 182.5416, GNorm = 0.1228, lr_0 = 1.8126e-04
Loss = 2.8558e-03, PNorm = 182.5465, GNorm = 0.1012, lr_0 = 1.8114e-04
Loss = 3.8267e-03, PNorm = 182.5504, GNorm = 0.1548, lr_0 = 1.8101e-04
Loss = 3.2063e-03, PNorm = 182.5535, GNorm = 0.1547, lr_0 = 1.8089e-04
Loss = 4.9785e-03, PNorm = 182.5571, GNorm = 0.1261, lr_0 = 1.8076e-04
Loss = 3.6611e-03, PNorm = 182.5596, GNorm = 0.1088, lr_0 = 1.8064e-04
Loss = 7.0409e-03, PNorm = 182.5599, GNorm = 0.2009, lr_0 = 1.8052e-04
Loss = 4.3393e-03, PNorm = 182.5619, GNorm = 0.0996, lr_0 = 1.8039e-04
Loss = 1.2997e-02, PNorm = 182.5655, GNorm = 0.2768, lr_0 = 1.8027e-04
Loss = 5.2988e-03, PNorm = 182.5682, GNorm = 0.0853, lr_0 = 1.8015e-04
Loss = 3.6629e-03, PNorm = 182.5707, GNorm = 0.1948, lr_0 = 1.8002e-04
Loss = 6.1005e-03, PNorm = 182.5743, GNorm = 0.9753, lr_0 = 1.7990e-04
Loss = 3.8295e-03, PNorm = 182.5787, GNorm = 0.0834, lr_0 = 1.7978e-04
Loss = 4.6267e-03, PNorm = 182.5833, GNorm = 0.3779, lr_0 = 1.7965e-04
Loss = 4.5837e-03, PNorm = 182.5870, GNorm = 0.1205, lr_0 = 1.7953e-04
Loss = 9.3498e-03, PNorm = 182.5918, GNorm = 0.1379, lr_0 = 1.7941e-04
Loss = 3.9268e-03, PNorm = 182.5974, GNorm = 0.1251, lr_0 = 1.7928e-04
Loss = 5.6257e-03, PNorm = 182.6023, GNorm = 0.3238, lr_0 = 1.7916e-04
Loss = 4.6279e-03, PNorm = 182.6048, GNorm = 0.0982, lr_0 = 1.7904e-04
Loss = 6.5010e-03, PNorm = 182.6104, GNorm = 0.1126, lr_0 = 1.7892e-04
Loss = 7.5838e-03, PNorm = 182.6164, GNorm = 0.0853, lr_0 = 1.7879e-04
Loss = 4.1135e-03, PNorm = 182.6215, GNorm = 0.2372, lr_0 = 1.7867e-04
Loss = 3.6209e-03, PNorm = 182.6274, GNorm = 0.0829, lr_0 = 1.7855e-04
Loss = 4.7999e-03, PNorm = 182.6322, GNorm = 0.2273, lr_0 = 1.7843e-04
Loss = 3.4168e-03, PNorm = 182.6370, GNorm = 0.1627, lr_0 = 1.7830e-04
Loss = 1.0354e-02, PNorm = 182.6416, GNorm = 0.2084, lr_0 = 1.7818e-04
Loss = 5.8272e-03, PNorm = 182.6456, GNorm = 0.1874, lr_0 = 1.7806e-04
Loss = 2.7309e-03, PNorm = 182.6485, GNorm = 0.0775, lr_0 = 1.7794e-04
Loss = 3.2345e-03, PNorm = 182.6503, GNorm = 0.0731, lr_0 = 1.7782e-04
Validation mae = 0.120748
Epoch 23
Loss = 2.2354e-03, PNorm = 182.6550, GNorm = 0.1081, lr_0 = 1.7769e-04
Loss = 3.2055e-03, PNorm = 182.6577, GNorm = 0.1245, lr_0 = 1.7757e-04
Loss = 2.7329e-03, PNorm = 182.6599, GNorm = 0.3010, lr_0 = 1.7745e-04
Loss = 7.6502e-03, PNorm = 182.6636, GNorm = 0.1175, lr_0 = 1.7733e-04
Loss = 5.6207e-03, PNorm = 182.6659, GNorm = 0.1455, lr_0 = 1.7721e-04
Loss = 3.2611e-03, PNorm = 182.6688, GNorm = 0.0854, lr_0 = 1.7709e-04
Loss = 2.9594e-03, PNorm = 182.6713, GNorm = 0.0959, lr_0 = 1.7696e-04
Loss = 2.5088e-03, PNorm = 182.6739, GNorm = 0.1124, lr_0 = 1.7684e-04
Loss = 2.8407e-03, PNorm = 182.6768, GNorm = 0.1366, lr_0 = 1.7672e-04
Loss = 3.5051e-03, PNorm = 182.6805, GNorm = 0.0816, lr_0 = 1.7660e-04
Loss = 3.5152e-03, PNorm = 182.6844, GNorm = 0.1185, lr_0 = 1.7648e-04
Loss = 4.4111e-03, PNorm = 182.6871, GNorm = 0.1709, lr_0 = 1.7636e-04
Loss = 2.3320e-03, PNorm = 182.6906, GNorm = 0.0687, lr_0 = 1.7624e-04
Loss = 9.4268e-03, PNorm = 182.6944, GNorm = 0.1520, lr_0 = 1.7612e-04
Loss = 4.2687e-03, PNorm = 182.6962, GNorm = 0.0826, lr_0 = 1.7600e-04
Loss = 2.8479e-03, PNorm = 182.6999, GNorm = 0.1038, lr_0 = 1.7588e-04
Loss = 2.9362e-03, PNorm = 182.7043, GNorm = 0.1418, lr_0 = 1.7576e-04
Loss = 3.5973e-03, PNorm = 182.7076, GNorm = 0.1310, lr_0 = 1.7564e-04
Loss = 2.7746e-03, PNorm = 182.7090, GNorm = 0.1196, lr_0 = 1.7552e-04
Loss = 2.5179e-03, PNorm = 182.7119, GNorm = 0.0708, lr_0 = 1.7540e-04
Loss = 2.4983e-03, PNorm = 182.7170, GNorm = 0.1244, lr_0 = 1.7528e-04
Loss = 2.5520e-03, PNorm = 182.7202, GNorm = 0.2529, lr_0 = 1.7516e-04
Loss = 1.7459e-02, PNorm = 182.7214, GNorm = 0.1228, lr_0 = 1.7504e-04
Loss = 4.6279e-03, PNorm = 182.7234, GNorm = 0.1260, lr_0 = 1.7492e-04
Loss = 3.1500e-03, PNorm = 182.7260, GNorm = 0.1040, lr_0 = 1.7480e-04
Loss = 2.9185e-03, PNorm = 182.7288, GNorm = 0.0940, lr_0 = 1.7468e-04
Loss = 6.6489e-03, PNorm = 182.7312, GNorm = 0.1053, lr_0 = 1.7456e-04
Loss = 4.9054e-03, PNorm = 182.7346, GNorm = 0.1399, lr_0 = 1.7444e-04
Loss = 4.4042e-03, PNorm = 182.7365, GNorm = 0.2171, lr_0 = 1.7432e-04
Loss = 3.6096e-03, PNorm = 182.7392, GNorm = 0.0673, lr_0 = 1.7420e-04
Loss = 3.4021e-03, PNorm = 182.7416, GNorm = 0.1490, lr_0 = 1.7408e-04
Loss = 4.4875e-03, PNorm = 182.7455, GNorm = 0.1180, lr_0 = 1.7396e-04
Loss = 5.2750e-03, PNorm = 182.7493, GNorm = 0.1235, lr_0 = 1.7384e-04
Loss = 3.9859e-03, PNorm = 182.7516, GNorm = 0.1331, lr_0 = 1.7372e-04
Loss = 5.7758e-03, PNorm = 182.7558, GNorm = 0.1263, lr_0 = 1.7360e-04
Loss = 3.7835e-03, PNorm = 182.7583, GNorm = 0.1032, lr_0 = 1.7348e-04
Loss = 2.0376e-03, PNorm = 182.7605, GNorm = 0.1365, lr_0 = 1.7336e-04
Loss = 8.7496e-03, PNorm = 182.7630, GNorm = 0.2150, lr_0 = 1.7325e-04
Loss = 2.2098e-03, PNorm = 182.7636, GNorm = 0.1213, lr_0 = 1.7313e-04
Loss = 2.3223e-03, PNorm = 182.7669, GNorm = 0.0824, lr_0 = 1.7301e-04
Loss = 4.9724e-03, PNorm = 182.7699, GNorm = 0.1014, lr_0 = 1.7289e-04
Loss = 2.8384e-03, PNorm = 182.7712, GNorm = 0.0914, lr_0 = 1.7277e-04
Loss = 4.4202e-03, PNorm = 182.7712, GNorm = 0.1542, lr_0 = 1.7265e-04
Loss = 6.5819e-03, PNorm = 182.7723, GNorm = 0.1360, lr_0 = 1.7253e-04
Loss = 8.5994e-03, PNorm = 182.7771, GNorm = 0.1272, lr_0 = 1.7242e-04
Loss = 4.3716e-03, PNorm = 182.7799, GNorm = 0.1919, lr_0 = 1.7230e-04
Loss = 4.7436e-03, PNorm = 182.7840, GNorm = 0.1544, lr_0 = 1.7218e-04
Loss = 4.6969e-03, PNorm = 182.7902, GNorm = 0.1360, lr_0 = 1.7206e-04
Loss = 5.3225e-03, PNorm = 182.7942, GNorm = 0.0631, lr_0 = 1.7194e-04
Loss = 7.1939e-03, PNorm = 182.7972, GNorm = 0.1003, lr_0 = 1.7183e-04
Loss = 3.1276e-03, PNorm = 182.7997, GNorm = 0.4523, lr_0 = 1.7171e-04
Loss = 4.3939e-03, PNorm = 182.8034, GNorm = 0.0684, lr_0 = 1.7159e-04
Loss = 3.8417e-03, PNorm = 182.8069, GNorm = 0.1242, lr_0 = 1.7147e-04
Loss = 3.0659e-03, PNorm = 182.8119, GNorm = 0.2436, lr_0 = 1.7136e-04
Loss = 6.2197e-03, PNorm = 182.8157, GNorm = 0.2699, lr_0 = 1.7124e-04
Loss = 2.1326e-03, PNorm = 182.8191, GNorm = 0.0817, lr_0 = 1.7112e-04
Loss = 2.0371e-03, PNorm = 182.8225, GNorm = 0.1152, lr_0 = 1.7100e-04
Loss = 7.0599e-03, PNorm = 182.8247, GNorm = 0.1273, lr_0 = 1.7089e-04
Loss = 5.9667e-03, PNorm = 182.8282, GNorm = 0.0993, lr_0 = 1.7077e-04
Loss = 8.8715e-03, PNorm = 182.8321, GNorm = 0.1977, lr_0 = 1.7065e-04
Loss = 4.2522e-03, PNorm = 182.8359, GNorm = 0.0733, lr_0 = 1.7054e-04
Loss = 3.4328e-03, PNorm = 182.8394, GNorm = 0.1174, lr_0 = 1.7042e-04
Loss = 3.1617e-03, PNorm = 182.8410, GNorm = 0.0674, lr_0 = 1.7030e-04
Loss = 4.0532e-03, PNorm = 182.8440, GNorm = 0.1819, lr_0 = 1.7019e-04
Loss = 2.4888e-03, PNorm = 182.8452, GNorm = 0.0761, lr_0 = 1.7007e-04
Loss = 5.6462e-03, PNorm = 182.8494, GNorm = 0.0979, lr_0 = 1.6995e-04
Loss = 5.2127e-03, PNorm = 182.8532, GNorm = 0.1893, lr_0 = 1.6984e-04
Loss = 6.2571e-03, PNorm = 182.8573, GNorm = 0.1280, lr_0 = 1.6972e-04
Loss = 7.3207e-03, PNorm = 182.8622, GNorm = 0.1275, lr_0 = 1.6960e-04
Loss = 2.8705e-03, PNorm = 182.8655, GNorm = 0.0836, lr_0 = 1.6949e-04
Loss = 5.0015e-03, PNorm = 182.8665, GNorm = 0.1321, lr_0 = 1.6937e-04
Loss = 3.1613e-03, PNorm = 182.8682, GNorm = 0.1042, lr_0 = 1.6926e-04
Loss = 2.5205e-03, PNorm = 182.8714, GNorm = 0.0813, lr_0 = 1.6914e-04
Loss = 3.6621e-03, PNorm = 182.8748, GNorm = 0.2731, lr_0 = 1.6902e-04
Loss = 4.9702e-03, PNorm = 182.8788, GNorm = 0.1000, lr_0 = 1.6891e-04
Loss = 2.0968e-02, PNorm = 182.8835, GNorm = 0.2418, lr_0 = 1.6879e-04
Loss = 4.7293e-03, PNorm = 182.8871, GNorm = 0.0799, lr_0 = 1.6868e-04
Loss = 5.1654e-03, PNorm = 182.8900, GNorm = 0.3030, lr_0 = 1.6856e-04
Loss = 2.8965e-03, PNorm = 182.8966, GNorm = 0.2211, lr_0 = 1.6845e-04
Loss = 4.0885e-03, PNorm = 182.9021, GNorm = 0.0831, lr_0 = 1.6833e-04
Loss = 2.3955e-03, PNorm = 182.9049, GNorm = 0.1574, lr_0 = 1.6821e-04
Loss = 3.7175e-03, PNorm = 182.9077, GNorm = 0.1411, lr_0 = 1.6810e-04
Loss = 5.6501e-03, PNorm = 182.9106, GNorm = 0.1033, lr_0 = 1.6798e-04
Loss = 2.2960e-03, PNorm = 182.9119, GNorm = 0.0853, lr_0 = 1.6787e-04
Loss = 5.0968e-03, PNorm = 182.9136, GNorm = 0.0995, lr_0 = 1.6775e-04
Loss = 5.3888e-03, PNorm = 182.9161, GNorm = 0.1587, lr_0 = 1.6764e-04
Loss = 2.7643e-03, PNorm = 182.9196, GNorm = 0.0838, lr_0 = 1.6752e-04
Loss = 3.3647e-03, PNorm = 182.9217, GNorm = 0.1744, lr_0 = 1.6741e-04
Loss = 8.4287e-03, PNorm = 182.9253, GNorm = 0.2979, lr_0 = 1.6729e-04
Loss = 7.1817e-03, PNorm = 182.9292, GNorm = 0.2137, lr_0 = 1.6718e-04
Loss = 2.3860e-03, PNorm = 182.9329, GNorm = 0.0894, lr_0 = 1.6707e-04
Loss = 6.1632e-03, PNorm = 182.9358, GNorm = 0.1542, lr_0 = 1.6695e-04
Loss = 8.7761e-03, PNorm = 182.9376, GNorm = 0.1335, lr_0 = 1.6684e-04
Loss = 2.7902e-03, PNorm = 182.9387, GNorm = 0.2286, lr_0 = 1.6672e-04
Loss = 3.4455e-03, PNorm = 182.9412, GNorm = 0.0912, lr_0 = 1.6661e-04
Loss = 4.4679e-03, PNorm = 182.9437, GNorm = 0.0684, lr_0 = 1.6649e-04
Loss = 6.7659e-03, PNorm = 182.9456, GNorm = 0.0581, lr_0 = 1.6638e-04
Loss = 5.1771e-03, PNorm = 182.9486, GNorm = 0.0943, lr_0 = 1.6627e-04
Loss = 4.2572e-03, PNorm = 182.9528, GNorm = 0.2095, lr_0 = 1.6615e-04
Loss = 2.7900e-03, PNorm = 182.9564, GNorm = 0.0867, lr_0 = 1.6604e-04
Loss = 3.1223e-03, PNorm = 182.9588, GNorm = 0.1607, lr_0 = 1.6592e-04
Loss = 2.9300e-03, PNorm = 182.9606, GNorm = 0.1380, lr_0 = 1.6581e-04
Loss = 3.6061e-03, PNorm = 182.9639, GNorm = 0.1750, lr_0 = 1.6570e-04
Loss = 4.2886e-03, PNorm = 182.9691, GNorm = 0.0804, lr_0 = 1.6558e-04
Loss = 4.0422e-03, PNorm = 182.9727, GNorm = 0.0714, lr_0 = 1.6547e-04
Loss = 3.9974e-03, PNorm = 182.9746, GNorm = 0.0807, lr_0 = 1.6536e-04
Loss = 6.8684e-03, PNorm = 182.9765, GNorm = 0.2571, lr_0 = 1.6524e-04
Loss = 6.7907e-03, PNorm = 182.9783, GNorm = 0.8800, lr_0 = 1.6513e-04
Loss = 5.0611e-03, PNorm = 182.9807, GNorm = 0.1215, lr_0 = 1.6502e-04
Loss = 3.3269e-03, PNorm = 182.9864, GNorm = 0.1962, lr_0 = 1.6490e-04
Loss = 6.8026e-03, PNorm = 182.9896, GNorm = 0.2518, lr_0 = 1.6479e-04
Loss = 5.2062e-03, PNorm = 182.9934, GNorm = 0.1067, lr_0 = 1.6468e-04
Loss = 2.7346e-03, PNorm = 182.9961, GNorm = 0.1598, lr_0 = 1.6457e-04
Loss = 3.2632e-03, PNorm = 182.9985, GNorm = 0.0989, lr_0 = 1.6445e-04
Loss = 2.9481e-03, PNorm = 183.0033, GNorm = 0.2019, lr_0 = 1.6434e-04
Loss = 8.2635e-03, PNorm = 183.0065, GNorm = 0.0773, lr_0 = 1.6423e-04
Loss = 5.4104e-03, PNorm = 183.0094, GNorm = 0.1025, lr_0 = 1.6412e-04
Loss = 3.9035e-03, PNorm = 183.0114, GNorm = 0.1101, lr_0 = 1.6400e-04
Loss = 2.5905e-03, PNorm = 183.0131, GNorm = 0.1143, lr_0 = 1.6389e-04
Loss = 2.6336e-03, PNorm = 183.0164, GNorm = 0.2540, lr_0 = 1.6378e-04
Validation mae = 0.120861
Epoch 24
Loss = 3.3255e-03, PNorm = 183.0195, GNorm = 0.1659, lr_0 = 1.6367e-04
Loss = 2.4007e-03, PNorm = 183.0219, GNorm = 0.1990, lr_0 = 1.6355e-04
Loss = 1.1629e-02, PNorm = 183.0242, GNorm = 0.1445, lr_0 = 1.6344e-04
Loss = 2.2852e-03, PNorm = 183.0263, GNorm = 0.0695, lr_0 = 1.6333e-04
Loss = 5.3247e-03, PNorm = 183.0286, GNorm = 0.0722, lr_0 = 1.6322e-04
Loss = 3.3673e-03, PNorm = 183.0309, GNorm = 0.3591, lr_0 = 1.6311e-04
Loss = 5.2135e-03, PNorm = 183.0315, GNorm = 0.2170, lr_0 = 1.6299e-04
Loss = 3.3435e-03, PNorm = 183.0338, GNorm = 0.0870, lr_0 = 1.6288e-04
Loss = 4.9428e-03, PNorm = 183.0370, GNorm = 0.3047, lr_0 = 1.6277e-04
Loss = 6.1872e-03, PNorm = 183.0393, GNorm = 0.0540, lr_0 = 1.6266e-04
Loss = 8.6673e-03, PNorm = 183.0412, GNorm = 0.1043, lr_0 = 1.6255e-04
Loss = 2.1717e-03, PNorm = 183.0425, GNorm = 0.0963, lr_0 = 1.6244e-04
Loss = 4.7816e-03, PNorm = 183.0446, GNorm = 0.2108, lr_0 = 1.6233e-04
Loss = 5.2694e-03, PNorm = 183.0474, GNorm = 0.1399, lr_0 = 1.6221e-04
Loss = 1.8172e-03, PNorm = 183.0500, GNorm = 0.1262, lr_0 = 1.6210e-04
Loss = 2.7925e-03, PNorm = 183.0512, GNorm = 0.1177, lr_0 = 1.6199e-04
Loss = 5.6528e-03, PNorm = 183.0531, GNorm = 0.1486, lr_0 = 1.6188e-04
Loss = 2.3345e-03, PNorm = 183.0548, GNorm = 0.0930, lr_0 = 1.6177e-04
Loss = 3.0407e-03, PNorm = 183.0564, GNorm = 0.0967, lr_0 = 1.6166e-04
Loss = 2.0805e-03, PNorm = 183.0590, GNorm = 0.1104, lr_0 = 1.6155e-04
Loss = 2.0177e-03, PNorm = 183.0616, GNorm = 0.1214, lr_0 = 1.6144e-04
Loss = 4.9163e-03, PNorm = 183.0632, GNorm = 0.1314, lr_0 = 1.6133e-04
Loss = 2.8842e-03, PNorm = 183.0664, GNorm = 0.0896, lr_0 = 1.6122e-04
Loss = 2.0763e-03, PNorm = 183.0696, GNorm = 0.0922, lr_0 = 1.6111e-04
Loss = 2.2826e-03, PNorm = 183.0730, GNorm = 0.0401, lr_0 = 1.6100e-04
Loss = 2.3101e-03, PNorm = 183.0761, GNorm = 0.1351, lr_0 = 1.6089e-04
Loss = 3.1720e-03, PNorm = 183.0791, GNorm = 0.1545, lr_0 = 1.6078e-04
Loss = 2.3189e-03, PNorm = 183.0807, GNorm = 0.1269, lr_0 = 1.6067e-04
Loss = 4.8563e-03, PNorm = 183.0850, GNorm = 0.3718, lr_0 = 1.6056e-04
Loss = 5.7192e-03, PNorm = 183.0880, GNorm = 0.2100, lr_0 = 1.6045e-04
Loss = 3.9146e-03, PNorm = 183.0915, GNorm = 0.2431, lr_0 = 1.6034e-04
Loss = 2.5966e-03, PNorm = 183.0951, GNorm = 0.4013, lr_0 = 1.6023e-04
Loss = 2.1105e-03, PNorm = 183.0979, GNorm = 0.1115, lr_0 = 1.6012e-04
Loss = 5.5574e-03, PNorm = 183.0998, GNorm = 0.1881, lr_0 = 1.6001e-04
Loss = 5.4921e-03, PNorm = 183.1021, GNorm = 0.1066, lr_0 = 1.5990e-04
Loss = 4.1700e-03, PNorm = 183.1057, GNorm = 0.2766, lr_0 = 1.5979e-04
Loss = 4.7567e-03, PNorm = 183.1103, GNorm = 0.1083, lr_0 = 1.5968e-04
Loss = 8.4288e-03, PNorm = 183.1138, GNorm = 0.1646, lr_0 = 1.5957e-04
Loss = 2.2465e-03, PNorm = 183.1166, GNorm = 0.0641, lr_0 = 1.5946e-04
Loss = 3.5964e-03, PNorm = 183.1200, GNorm = 0.1356, lr_0 = 1.5935e-04
Loss = 2.1212e-03, PNorm = 183.1236, GNorm = 0.1349, lr_0 = 1.5924e-04
Loss = 3.4313e-03, PNorm = 183.1256, GNorm = 0.0720, lr_0 = 1.5913e-04
Loss = 4.0872e-03, PNorm = 183.1276, GNorm = 0.3137, lr_0 = 1.5902e-04
Loss = 2.5081e-03, PNorm = 183.1312, GNorm = 0.0890, lr_0 = 1.5891e-04
Loss = 7.5419e-03, PNorm = 183.1341, GNorm = 0.1143, lr_0 = 1.5880e-04
Loss = 3.0096e-03, PNorm = 183.1361, GNorm = 0.1355, lr_0 = 1.5870e-04
Loss = 3.6326e-03, PNorm = 183.1391, GNorm = 0.1697, lr_0 = 1.5859e-04
Loss = 2.5134e-03, PNorm = 183.1434, GNorm = 0.1179, lr_0 = 1.5848e-04
Loss = 7.3827e-03, PNorm = 183.1474, GNorm = 0.1527, lr_0 = 1.5837e-04
Loss = 3.7244e-03, PNorm = 183.1490, GNorm = 0.4813, lr_0 = 1.5826e-04
Loss = 3.0343e-03, PNorm = 183.1525, GNorm = 0.0763, lr_0 = 1.5815e-04
Loss = 2.0954e-03, PNorm = 183.1554, GNorm = 0.1443, lr_0 = 1.5804e-04
Loss = 2.2036e-03, PNorm = 183.1571, GNorm = 0.1350, lr_0 = 1.5794e-04
Loss = 2.9204e-03, PNorm = 183.1602, GNorm = 0.1699, lr_0 = 1.5783e-04
Loss = 7.6131e-03, PNorm = 183.1620, GNorm = 0.1958, lr_0 = 1.5772e-04
Loss = 2.3973e-03, PNorm = 183.1646, GNorm = 0.2171, lr_0 = 1.5761e-04
Loss = 3.4902e-03, PNorm = 183.1677, GNorm = 0.0799, lr_0 = 1.5750e-04
Loss = 5.2903e-03, PNorm = 183.1698, GNorm = 0.3315, lr_0 = 1.5740e-04
Loss = 2.7045e-03, PNorm = 183.1706, GNorm = 0.2374, lr_0 = 1.5729e-04
Loss = 4.9327e-03, PNorm = 183.1722, GNorm = 0.1195, lr_0 = 1.5718e-04
Loss = 2.5039e-03, PNorm = 183.1747, GNorm = 0.1382, lr_0 = 1.5707e-04
Loss = 2.5817e-03, PNorm = 183.1768, GNorm = 0.0988, lr_0 = 1.5697e-04
Loss = 3.2297e-03, PNorm = 183.1799, GNorm = 0.0827, lr_0 = 1.5686e-04
Loss = 5.4674e-03, PNorm = 183.1812, GNorm = 0.1137, lr_0 = 1.5675e-04
Loss = 5.8368e-03, PNorm = 183.1841, GNorm = 0.1179, lr_0 = 1.5664e-04
Loss = 5.8953e-03, PNorm = 183.1881, GNorm = 0.1023, lr_0 = 1.5654e-04
Loss = 2.6236e-03, PNorm = 183.1904, GNorm = 0.1576, lr_0 = 1.5643e-04
Loss = 2.7679e-03, PNorm = 183.1925, GNorm = 0.1573, lr_0 = 1.5632e-04
Loss = 2.2164e-03, PNorm = 183.1952, GNorm = 0.1354, lr_0 = 1.5621e-04
Loss = 5.3420e-03, PNorm = 183.1988, GNorm = 0.2148, lr_0 = 1.5611e-04
Loss = 5.3296e-03, PNorm = 183.2014, GNorm = 0.0622, lr_0 = 1.5600e-04
Loss = 2.0775e-03, PNorm = 183.2042, GNorm = 0.0619, lr_0 = 1.5589e-04
Loss = 5.1154e-03, PNorm = 183.2079, GNorm = 0.3282, lr_0 = 1.5579e-04
Loss = 4.5110e-03, PNorm = 183.2095, GNorm = 0.1600, lr_0 = 1.5568e-04
Loss = 2.5393e-03, PNorm = 183.2124, GNorm = 0.1338, lr_0 = 1.5557e-04
Loss = 2.2826e-03, PNorm = 183.2154, GNorm = 0.1159, lr_0 = 1.5547e-04
Loss = 4.5406e-03, PNorm = 183.2180, GNorm = 0.1351, lr_0 = 1.5536e-04
Loss = 3.7247e-03, PNorm = 183.2209, GNorm = 0.0728, lr_0 = 1.5525e-04
Loss = 7.7707e-03, PNorm = 183.2242, GNorm = 0.1466, lr_0 = 1.5515e-04
Loss = 2.9562e-03, PNorm = 183.2269, GNorm = 0.1853, lr_0 = 1.5504e-04
Loss = 4.3063e-03, PNorm = 183.2285, GNorm = 0.1021, lr_0 = 1.5493e-04
Loss = 1.9478e-02, PNorm = 183.2317, GNorm = 0.0636, lr_0 = 1.5483e-04
Loss = 2.4983e-03, PNorm = 183.2322, GNorm = 0.1477, lr_0 = 1.5472e-04
Loss = 2.6184e-03, PNorm = 183.2362, GNorm = 0.0806, lr_0 = 1.5462e-04
Loss = 2.8104e-03, PNorm = 183.2391, GNorm = 0.1372, lr_0 = 1.5451e-04
Loss = 2.7117e-03, PNorm = 183.2415, GNorm = 0.1391, lr_0 = 1.5440e-04
Loss = 2.1510e-03, PNorm = 183.2448, GNorm = 0.1389, lr_0 = 1.5430e-04
Loss = 2.5652e-03, PNorm = 183.2489, GNorm = 0.1011, lr_0 = 1.5419e-04
Loss = 2.0423e-03, PNorm = 183.2506, GNorm = 0.0849, lr_0 = 1.5409e-04
Loss = 6.2470e-03, PNorm = 183.2528, GNorm = 0.1383, lr_0 = 1.5398e-04
Loss = 3.4722e-03, PNorm = 183.2566, GNorm = 0.1137, lr_0 = 1.5388e-04
Loss = 5.1882e-03, PNorm = 183.2603, GNorm = 0.0859, lr_0 = 1.5377e-04
Loss = 2.6059e-03, PNorm = 183.2632, GNorm = 0.1511, lr_0 = 1.5367e-04
Loss = 5.6798e-03, PNorm = 183.2671, GNorm = 0.3848, lr_0 = 1.5356e-04
Loss = 3.4485e-03, PNorm = 183.2701, GNorm = 0.0735, lr_0 = 1.5346e-04
Loss = 1.0927e-02, PNorm = 183.2719, GNorm = 0.2129, lr_0 = 1.5335e-04
Loss = 4.1643e-03, PNorm = 183.2738, GNorm = 0.1976, lr_0 = 1.5325e-04
Loss = 2.1670e-03, PNorm = 183.2762, GNorm = 0.1101, lr_0 = 1.5314e-04
Loss = 4.4983e-03, PNorm = 183.2782, GNorm = 0.1668, lr_0 = 1.5304e-04
Loss = 4.3564e-03, PNorm = 183.2801, GNorm = 0.0729, lr_0 = 1.5293e-04
Loss = 6.1744e-03, PNorm = 183.2824, GNorm = 0.1052, lr_0 = 1.5283e-04
Loss = 5.2195e-03, PNorm = 183.2857, GNorm = 0.0764, lr_0 = 1.5272e-04
Loss = 2.6583e-03, PNorm = 183.2886, GNorm = 0.0881, lr_0 = 1.5262e-04
Loss = 1.8764e-03, PNorm = 183.2912, GNorm = 0.1235, lr_0 = 1.5251e-04
Loss = 4.7411e-03, PNorm = 183.2933, GNorm = 0.4708, lr_0 = 1.5241e-04
Loss = 4.3424e-03, PNorm = 183.2980, GNorm = 0.1106, lr_0 = 1.5230e-04
Loss = 4.6396e-03, PNorm = 183.3015, GNorm = 0.0813, lr_0 = 1.5220e-04
Loss = 2.7372e-03, PNorm = 183.3044, GNorm = 0.1090, lr_0 = 1.5209e-04
Loss = 6.6686e-03, PNorm = 183.3076, GNorm = 0.1654, lr_0 = 1.5199e-04
Loss = 5.9226e-03, PNorm = 183.3104, GNorm = 0.1449, lr_0 = 1.5189e-04
Loss = 3.4656e-03, PNorm = 183.3130, GNorm = 0.3419, lr_0 = 1.5178e-04
Loss = 4.4781e-03, PNorm = 183.3137, GNorm = 0.0739, lr_0 = 1.5168e-04
Loss = 1.2331e-02, PNorm = 183.3158, GNorm = 0.0990, lr_0 = 1.5157e-04
Loss = 4.1101e-03, PNorm = 183.3184, GNorm = 0.0751, lr_0 = 1.5147e-04
Loss = 2.4802e-03, PNorm = 183.3205, GNorm = 0.2095, lr_0 = 1.5137e-04
Loss = 4.3912e-03, PNorm = 183.3237, GNorm = 0.1242, lr_0 = 1.5126e-04
Loss = 3.7294e-03, PNorm = 183.3287, GNorm = 0.1752, lr_0 = 1.5116e-04
Loss = 7.3436e-03, PNorm = 183.3316, GNorm = 0.1606, lr_0 = 1.5106e-04
Loss = 2.5201e-03, PNorm = 183.3341, GNorm = 0.0993, lr_0 = 1.5095e-04
Loss = 2.9294e-03, PNorm = 183.3345, GNorm = 0.0543, lr_0 = 1.5085e-04
Validation mae = 0.120947
Epoch 25
Loss = 2.5837e-03, PNorm = 183.3375, GNorm = 0.1647, lr_0 = 1.5075e-04
Loss = 2.4034e-03, PNorm = 183.3396, GNorm = 0.2048, lr_0 = 1.5064e-04
Loss = 5.2509e-03, PNorm = 183.3401, GNorm = 0.1497, lr_0 = 1.5054e-04
Loss = 2.1584e-03, PNorm = 183.3427, GNorm = 0.1124, lr_0 = 1.5044e-04
Loss = 5.4049e-03, PNorm = 183.3454, GNorm = 0.2591, lr_0 = 1.5033e-04
Loss = 2.8582e-03, PNorm = 183.3479, GNorm = 0.0854, lr_0 = 1.5023e-04
Loss = 2.1918e-03, PNorm = 183.3511, GNorm = 0.1599, lr_0 = 1.5013e-04
Loss = 2.6684e-03, PNorm = 183.3530, GNorm = 0.1012, lr_0 = 1.5002e-04
Loss = 3.1445e-03, PNorm = 183.3551, GNorm = 0.0452, lr_0 = 1.4992e-04
Loss = 2.0750e-03, PNorm = 183.3573, GNorm = 0.1064, lr_0 = 1.4982e-04
Loss = 1.5679e-03, PNorm = 183.3589, GNorm = 0.1048, lr_0 = 1.4972e-04
Loss = 1.9242e-03, PNorm = 183.3604, GNorm = 0.2444, lr_0 = 1.4961e-04
Loss = 2.3700e-03, PNorm = 183.3624, GNorm = 0.1541, lr_0 = 1.4951e-04
Loss = 2.2976e-03, PNorm = 183.3647, GNorm = 0.1985, lr_0 = 1.4941e-04
Loss = 2.2563e-03, PNorm = 183.3665, GNorm = 0.1160, lr_0 = 1.4931e-04
Loss = 3.7419e-03, PNorm = 183.3668, GNorm = 0.1154, lr_0 = 1.4920e-04
Loss = 4.8674e-03, PNorm = 183.3695, GNorm = 0.0798, lr_0 = 1.4910e-04
Loss = 8.7503e-03, PNorm = 183.3708, GNorm = 0.1156, lr_0 = 1.4900e-04
Loss = 2.5764e-03, PNorm = 183.3728, GNorm = 0.2213, lr_0 = 1.4890e-04
Loss = 3.2115e-03, PNorm = 183.3766, GNorm = 0.1901, lr_0 = 1.4880e-04
Loss = 5.0649e-03, PNorm = 183.3792, GNorm = 0.1197, lr_0 = 1.4869e-04
Loss = 4.0333e-03, PNorm = 183.3829, GNorm = 0.1082, lr_0 = 1.4859e-04
Loss = 2.4128e-03, PNorm = 183.3876, GNorm = 0.1651, lr_0 = 1.4849e-04
Loss = 2.0354e-03, PNorm = 183.3905, GNorm = 0.1965, lr_0 = 1.4839e-04
Loss = 7.6346e-03, PNorm = 183.3947, GNorm = 0.1773, lr_0 = 1.4829e-04
Loss = 3.6871e-03, PNorm = 183.3968, GNorm = 0.0857, lr_0 = 1.4818e-04
Loss = 2.7265e-03, PNorm = 183.3985, GNorm = 0.1327, lr_0 = 1.4808e-04
Loss = 2.7314e-03, PNorm = 183.4011, GNorm = 0.2195, lr_0 = 1.4798e-04
Loss = 3.5582e-03, PNorm = 183.4019, GNorm = 0.1487, lr_0 = 1.4788e-04
Loss = 2.4202e-03, PNorm = 183.4039, GNorm = 0.1505, lr_0 = 1.4778e-04
Loss = 3.7048e-03, PNorm = 183.4072, GNorm = 0.1286, lr_0 = 1.4768e-04
Loss = 6.3504e-03, PNorm = 183.4093, GNorm = 0.1525, lr_0 = 1.4758e-04
Loss = 1.8877e-03, PNorm = 183.4111, GNorm = 0.0694, lr_0 = 1.4748e-04
Loss = 4.6187e-03, PNorm = 183.4130, GNorm = 0.0822, lr_0 = 1.4737e-04
Loss = 5.8412e-03, PNorm = 183.4141, GNorm = 0.0472, lr_0 = 1.4727e-04
Loss = 3.0392e-03, PNorm = 183.4161, GNorm = 0.1981, lr_0 = 1.4717e-04
Loss = 7.2257e-03, PNorm = 183.4187, GNorm = 0.1679, lr_0 = 1.4707e-04
Loss = 2.6435e-03, PNorm = 183.4207, GNorm = 0.1620, lr_0 = 1.4697e-04
Loss = 6.6768e-03, PNorm = 183.4243, GNorm = 0.1577, lr_0 = 1.4687e-04
Loss = 7.7171e-03, PNorm = 183.4279, GNorm = 0.0863, lr_0 = 1.4677e-04
Loss = 3.7376e-03, PNorm = 183.4302, GNorm = 0.0929, lr_0 = 1.4667e-04
Loss = 1.3983e-02, PNorm = 183.4339, GNorm = 0.3250, lr_0 = 1.4657e-04
Loss = 2.2026e-03, PNorm = 183.4356, GNorm = 0.0899, lr_0 = 1.4647e-04
Loss = 2.4548e-03, PNorm = 183.4379, GNorm = 0.2966, lr_0 = 1.4637e-04
Loss = 2.5808e-03, PNorm = 183.4398, GNorm = 0.2042, lr_0 = 1.4627e-04
Loss = 3.7781e-03, PNorm = 183.4410, GNorm = 0.0803, lr_0 = 1.4617e-04
Loss = 2.8585e-03, PNorm = 183.4428, GNorm = 0.0974, lr_0 = 1.4607e-04
Loss = 1.7922e-03, PNorm = 183.4451, GNorm = 0.2267, lr_0 = 1.4597e-04
Loss = 2.0603e-03, PNorm = 183.4470, GNorm = 0.1988, lr_0 = 1.4587e-04
Loss = 1.6294e-03, PNorm = 183.4483, GNorm = 0.0839, lr_0 = 1.4577e-04
Loss = 6.3825e-03, PNorm = 183.4514, GNorm = 0.2164, lr_0 = 1.4567e-04
Loss = 4.3888e-03, PNorm = 183.4539, GNorm = 0.1194, lr_0 = 1.4557e-04
Loss = 4.0874e-03, PNorm = 183.4553, GNorm = 0.0609, lr_0 = 1.4547e-04
Loss = 1.8964e-03, PNorm = 183.4571, GNorm = 0.0736, lr_0 = 1.4537e-04
Loss = 3.8691e-03, PNorm = 183.4600, GNorm = 0.0653, lr_0 = 1.4527e-04
Loss = 2.3819e-03, PNorm = 183.4637, GNorm = 0.0662, lr_0 = 1.4517e-04
Loss = 6.1040e-03, PNorm = 183.4666, GNorm = 0.1561, lr_0 = 1.4507e-04
Loss = 2.4021e-03, PNorm = 183.4692, GNorm = 0.0680, lr_0 = 1.4497e-04
Loss = 3.9248e-03, PNorm = 183.4709, GNorm = 0.1468, lr_0 = 1.4487e-04
Loss = 2.3364e-03, PNorm = 183.4730, GNorm = 0.1298, lr_0 = 1.4477e-04
Loss = 2.2152e-03, PNorm = 183.4751, GNorm = 0.1068, lr_0 = 1.4467e-04
Loss = 3.8640e-03, PNorm = 183.4783, GNorm = 0.2596, lr_0 = 1.4457e-04
Loss = 3.3908e-03, PNorm = 183.4797, GNorm = 0.0752, lr_0 = 1.4447e-04
Loss = 2.1237e-03, PNorm = 183.4817, GNorm = 0.0955, lr_0 = 1.4438e-04
Loss = 1.8256e-03, PNorm = 183.4842, GNorm = 0.1041, lr_0 = 1.4428e-04
Loss = 5.9378e-03, PNorm = 183.4867, GNorm = 0.0850, lr_0 = 1.4418e-04
Loss = 3.6705e-03, PNorm = 183.4891, GNorm = 0.1380, lr_0 = 1.4408e-04
Loss = 2.7268e-03, PNorm = 183.4918, GNorm = 0.0731, lr_0 = 1.4398e-04
Loss = 1.8594e-03, PNorm = 183.4938, GNorm = 0.1642, lr_0 = 1.4388e-04
Loss = 3.8227e-03, PNorm = 183.4966, GNorm = 0.0971, lr_0 = 1.4378e-04
Loss = 2.9708e-03, PNorm = 183.4985, GNorm = 0.2896, lr_0 = 1.4368e-04
Loss = 2.0789e-03, PNorm = 183.4992, GNorm = 0.1647, lr_0 = 1.4359e-04
Loss = 5.1550e-03, PNorm = 183.5010, GNorm = 0.3142, lr_0 = 1.4349e-04
Loss = 3.2854e-03, PNorm = 183.5035, GNorm = 0.1619, lr_0 = 1.4339e-04
Loss = 3.4850e-03, PNorm = 183.5049, GNorm = 0.0693, lr_0 = 1.4329e-04
Loss = 8.1804e-03, PNorm = 183.5085, GNorm = 0.2459, lr_0 = 1.4319e-04
Loss = 3.2369e-03, PNorm = 183.5108, GNorm = 0.1966, lr_0 = 1.4310e-04
Loss = 1.8600e-03, PNorm = 183.5119, GNorm = 0.0562, lr_0 = 1.4300e-04
Loss = 3.4711e-03, PNorm = 183.5146, GNorm = 0.5122, lr_0 = 1.4290e-04
Loss = 3.3675e-03, PNorm = 183.5161, GNorm = 0.0884, lr_0 = 1.4280e-04
Loss = 2.4817e-03, PNorm = 183.5181, GNorm = 0.0962, lr_0 = 1.4270e-04
Loss = 5.0802e-03, PNorm = 183.5209, GNorm = 0.1101, lr_0 = 1.4261e-04
Loss = 3.0565e-03, PNorm = 183.5230, GNorm = 0.1453, lr_0 = 1.4251e-04
Loss = 2.6634e-03, PNorm = 183.5246, GNorm = 0.1034, lr_0 = 1.4241e-04
Loss = 2.3263e-03, PNorm = 183.5268, GNorm = 0.4197, lr_0 = 1.4231e-04
Loss = 2.8832e-03, PNorm = 183.5284, GNorm = 0.1025, lr_0 = 1.4222e-04
Loss = 4.7371e-03, PNorm = 183.5295, GNorm = 0.1691, lr_0 = 1.4212e-04
Loss = 8.6187e-03, PNorm = 183.5325, GNorm = 0.2375, lr_0 = 1.4202e-04
Loss = 7.0134e-03, PNorm = 183.5349, GNorm = 0.1362, lr_0 = 1.4192e-04
Loss = 2.2728e-03, PNorm = 183.5367, GNorm = 0.0547, lr_0 = 1.4183e-04
Loss = 3.5508e-03, PNorm = 183.5379, GNorm = 0.0564, lr_0 = 1.4173e-04
Loss = 4.2760e-03, PNorm = 183.5396, GNorm = 0.1089, lr_0 = 1.4163e-04
Loss = 3.1789e-03, PNorm = 183.5419, GNorm = 0.1081, lr_0 = 1.4153e-04
Loss = 3.9148e-03, PNorm = 183.5456, GNorm = 0.0628, lr_0 = 1.4144e-04
Loss = 3.2672e-03, PNorm = 183.5474, GNorm = 0.0550, lr_0 = 1.4134e-04
Loss = 7.7448e-03, PNorm = 183.5478, GNorm = 0.6693, lr_0 = 1.4124e-04
Loss = 3.2454e-03, PNorm = 183.5488, GNorm = 0.0809, lr_0 = 1.4115e-04
Loss = 5.4837e-03, PNorm = 183.5528, GNorm = 0.5202, lr_0 = 1.4105e-04
Loss = 2.9038e-03, PNorm = 183.5553, GNorm = 0.0709, lr_0 = 1.4095e-04
Loss = 2.1309e-03, PNorm = 183.5564, GNorm = 0.0673, lr_0 = 1.4086e-04
Loss = 3.1974e-03, PNorm = 183.5575, GNorm = 0.2308, lr_0 = 1.4076e-04
Loss = 4.5297e-03, PNorm = 183.5603, GNorm = 0.0764, lr_0 = 1.4066e-04
Loss = 6.1684e-03, PNorm = 183.5616, GNorm = 0.0970, lr_0 = 1.4057e-04
Loss = 7.3757e-03, PNorm = 183.5641, GNorm = 0.1208, lr_0 = 1.4047e-04
Loss = 4.4858e-03, PNorm = 183.5673, GNorm = 0.1182, lr_0 = 1.4038e-04
Loss = 5.5871e-03, PNorm = 183.5717, GNorm = 0.6368, lr_0 = 1.4028e-04
Loss = 6.0038e-03, PNorm = 183.5743, GNorm = 0.3168, lr_0 = 1.4018e-04
Loss = 3.9347e-03, PNorm = 183.5777, GNorm = 0.5575, lr_0 = 1.4009e-04
Loss = 5.9482e-03, PNorm = 183.5822, GNorm = 0.4795, lr_0 = 1.3999e-04
Loss = 2.1138e-03, PNorm = 183.5838, GNorm = 0.1100, lr_0 = 1.3990e-04
Loss = 3.2290e-03, PNorm = 183.5868, GNorm = 0.1085, lr_0 = 1.3980e-04
Loss = 3.2734e-03, PNorm = 183.5902, GNorm = 0.1053, lr_0 = 1.3970e-04
Loss = 2.7597e-03, PNorm = 183.5938, GNorm = 0.0539, lr_0 = 1.3961e-04
Loss = 7.2646e-03, PNorm = 183.5973, GNorm = 0.0882, lr_0 = 1.3951e-04
Loss = 2.9402e-03, PNorm = 183.5987, GNorm = 0.3090, lr_0 = 1.3942e-04
Loss = 2.9816e-03, PNorm = 183.6003, GNorm = 0.2209, lr_0 = 1.3932e-04
Loss = 4.1988e-03, PNorm = 183.6027, GNorm = 0.2674, lr_0 = 1.3923e-04
Loss = 4.2845e-03, PNorm = 183.6042, GNorm = 0.1793, lr_0 = 1.3913e-04
Loss = 2.0093e-03, PNorm = 183.6059, GNorm = 0.1495, lr_0 = 1.3904e-04
Loss = 3.1660e-03, PNorm = 183.6091, GNorm = 0.1196, lr_0 = 1.3894e-04
Validation mae = 0.120896
Epoch 26
Loss = 9.6854e-03, PNorm = 183.6112, GNorm = 0.1357, lr_0 = 1.3884e-04
Loss = 1.9347e-03, PNorm = 183.6130, GNorm = 0.2043, lr_0 = 1.3875e-04
Loss = 3.2857e-03, PNorm = 183.6150, GNorm = 0.1676, lr_0 = 1.3865e-04
Loss = 2.7188e-03, PNorm = 183.6174, GNorm = 0.1131, lr_0 = 1.3856e-04
Loss = 3.7314e-03, PNorm = 183.6191, GNorm = 0.0683, lr_0 = 1.3846e-04
Loss = 3.1150e-03, PNorm = 183.6204, GNorm = 0.0819, lr_0 = 1.3837e-04
Loss = 3.1910e-03, PNorm = 183.6216, GNorm = 0.0972, lr_0 = 1.3828e-04
Loss = 3.7201e-03, PNorm = 183.6212, GNorm = 0.0737, lr_0 = 1.3818e-04
Loss = 2.3778e-03, PNorm = 183.6226, GNorm = 0.0893, lr_0 = 1.3809e-04
Loss = 3.3870e-03, PNorm = 183.6246, GNorm = 0.1330, lr_0 = 1.3799e-04
Loss = 3.5949e-03, PNorm = 183.6264, GNorm = 0.1140, lr_0 = 1.3790e-04
Loss = 3.3873e-03, PNorm = 183.6279, GNorm = 0.0741, lr_0 = 1.3780e-04
Loss = 1.4914e-03, PNorm = 183.6296, GNorm = 0.0969, lr_0 = 1.3771e-04
Loss = 2.6948e-03, PNorm = 183.6325, GNorm = 0.0385, lr_0 = 1.3761e-04
Loss = 6.3946e-03, PNorm = 183.6343, GNorm = 0.0949, lr_0 = 1.3752e-04
Loss = 2.2791e-03, PNorm = 183.6358, GNorm = 0.1158, lr_0 = 1.3742e-04
Loss = 6.4136e-03, PNorm = 183.6361, GNorm = 1.2519, lr_0 = 1.3733e-04
Loss = 2.6860e-03, PNorm = 183.6365, GNorm = 0.1666, lr_0 = 1.3724e-04
Loss = 2.3492e-03, PNorm = 183.6389, GNorm = 0.1927, lr_0 = 1.3714e-04
Loss = 5.3313e-03, PNorm = 183.6398, GNorm = 0.1009, lr_0 = 1.3705e-04
Loss = 6.7335e-03, PNorm = 183.6406, GNorm = 0.4114, lr_0 = 1.3695e-04
Loss = 1.7836e-03, PNorm = 183.6421, GNorm = 0.0751, lr_0 = 1.3686e-04
Loss = 2.8709e-03, PNorm = 183.6444, GNorm = 0.0666, lr_0 = 1.3677e-04
Loss = 4.2876e-03, PNorm = 183.6464, GNorm = 0.0907, lr_0 = 1.3667e-04
Loss = 2.6502e-03, PNorm = 183.6478, GNorm = 0.0857, lr_0 = 1.3658e-04
Loss = 2.0991e-03, PNorm = 183.6485, GNorm = 0.1541, lr_0 = 1.3649e-04
Loss = 2.8008e-03, PNorm = 183.6495, GNorm = 0.1298, lr_0 = 1.3639e-04
Loss = 2.5546e-03, PNorm = 183.6516, GNorm = 0.1612, lr_0 = 1.3630e-04
Loss = 1.5708e-03, PNorm = 183.6532, GNorm = 0.1067, lr_0 = 1.3621e-04
Loss = 1.9365e-03, PNorm = 183.6547, GNorm = 0.0982, lr_0 = 1.3611e-04
Loss = 1.8759e-03, PNorm = 183.6569, GNorm = 0.1004, lr_0 = 1.3602e-04
Loss = 5.0758e-03, PNorm = 183.6585, GNorm = 0.0668, lr_0 = 1.3593e-04
Loss = 1.7670e-03, PNorm = 183.6607, GNorm = 0.0918, lr_0 = 1.3583e-04
Loss = 2.6534e-03, PNorm = 183.6619, GNorm = 0.1166, lr_0 = 1.3574e-04
Loss = 1.7053e-03, PNorm = 183.6625, GNorm = 0.0538, lr_0 = 1.3565e-04
Loss = 1.5620e-03, PNorm = 183.6645, GNorm = 0.0889, lr_0 = 1.3555e-04
Loss = 1.7250e-03, PNorm = 183.6657, GNorm = 0.0589, lr_0 = 1.3546e-04
Loss = 2.4035e-03, PNorm = 183.6666, GNorm = 0.1331, lr_0 = 1.3537e-04
Loss = 3.5940e-03, PNorm = 183.6672, GNorm = 0.1112, lr_0 = 1.3528e-04
Loss = 1.8702e-03, PNorm = 183.6674, GNorm = 0.1032, lr_0 = 1.3518e-04
Loss = 6.3091e-03, PNorm = 183.6681, GNorm = 0.7559, lr_0 = 1.3509e-04
Loss = 1.9690e-03, PNorm = 183.6697, GNorm = 0.0839, lr_0 = 1.3500e-04
Loss = 1.5221e-03, PNorm = 183.6713, GNorm = 0.1322, lr_0 = 1.3491e-04
Loss = 6.2340e-03, PNorm = 183.6731, GNorm = 0.0733, lr_0 = 1.3481e-04
Loss = 4.3476e-03, PNorm = 183.6748, GNorm = 0.0979, lr_0 = 1.3472e-04
Loss = 3.3177e-03, PNorm = 183.6773, GNorm = 0.1261, lr_0 = 1.3463e-04
Loss = 5.1523e-03, PNorm = 183.6796, GNorm = 0.1530, lr_0 = 1.3454e-04
Loss = 2.4737e-03, PNorm = 183.6807, GNorm = 0.1176, lr_0 = 1.3444e-04
Loss = 2.5040e-03, PNorm = 183.6824, GNorm = 0.0802, lr_0 = 1.3435e-04
Loss = 3.6725e-03, PNorm = 183.6843, GNorm = 0.3886, lr_0 = 1.3426e-04
Loss = 5.5018e-03, PNorm = 183.6852, GNorm = 0.0647, lr_0 = 1.3417e-04
Loss = 3.0780e-03, PNorm = 183.6876, GNorm = 0.1369, lr_0 = 1.3408e-04
Loss = 2.7168e-03, PNorm = 183.6898, GNorm = 0.1033, lr_0 = 1.3398e-04
Loss = 7.1162e-03, PNorm = 183.6913, GNorm = 0.3530, lr_0 = 1.3389e-04
Loss = 5.0403e-03, PNorm = 183.6927, GNorm = 0.2167, lr_0 = 1.3380e-04
Loss = 7.9653e-03, PNorm = 183.6946, GNorm = 0.0774, lr_0 = 1.3371e-04
Loss = 2.6033e-03, PNorm = 183.6959, GNorm = 0.2231, lr_0 = 1.3362e-04
Loss = 3.0886e-03, PNorm = 183.6987, GNorm = 0.0574, lr_0 = 1.3353e-04
Loss = 1.9506e-03, PNorm = 183.7011, GNorm = 0.1613, lr_0 = 1.3343e-04
Loss = 2.2452e-03, PNorm = 183.7041, GNorm = 0.1623, lr_0 = 1.3334e-04
Loss = 1.5870e-03, PNorm = 183.7068, GNorm = 0.2018, lr_0 = 1.3325e-04
Loss = 2.9487e-03, PNorm = 183.7095, GNorm = 0.3685, lr_0 = 1.3316e-04
Loss = 1.5567e-03, PNorm = 183.7124, GNorm = 0.0501, lr_0 = 1.3307e-04
Loss = 1.8256e-02, PNorm = 183.7159, GNorm = 3.2720, lr_0 = 1.3298e-04
Loss = 2.3287e-03, PNorm = 183.7183, GNorm = 0.2896, lr_0 = 1.3289e-04
Loss = 4.3934e-03, PNorm = 183.7197, GNorm = 0.3237, lr_0 = 1.3280e-04
Loss = 6.6810e-03, PNorm = 183.7232, GNorm = 0.1539, lr_0 = 1.3270e-04
Loss = 5.4368e-03, PNorm = 183.7251, GNorm = 0.1024, lr_0 = 1.3261e-04
Loss = 3.3488e-03, PNorm = 183.7268, GNorm = 0.0741, lr_0 = 1.3252e-04
Loss = 4.1940e-03, PNorm = 183.7290, GNorm = 0.0741, lr_0 = 1.3243e-04
Loss = 3.2690e-03, PNorm = 183.7307, GNorm = 0.2050, lr_0 = 1.3234e-04
Loss = 4.4075e-03, PNorm = 183.7312, GNorm = 0.0987, lr_0 = 1.3225e-04
Loss = 1.8896e-03, PNorm = 183.7328, GNorm = 0.0963, lr_0 = 1.3216e-04
Loss = 2.2199e-03, PNorm = 183.7339, GNorm = 0.1613, lr_0 = 1.3207e-04
Loss = 3.9976e-03, PNorm = 183.7370, GNorm = 0.1177, lr_0 = 1.3198e-04
Loss = 5.5935e-03, PNorm = 183.7409, GNorm = 0.1930, lr_0 = 1.3189e-04
Loss = 3.1672e-03, PNorm = 183.7432, GNorm = 0.2003, lr_0 = 1.3180e-04
Loss = 6.6796e-03, PNorm = 183.7453, GNorm = 0.0898, lr_0 = 1.3171e-04
Loss = 1.4202e-03, PNorm = 183.7475, GNorm = 0.0870, lr_0 = 1.3162e-04
Loss = 1.9359e-03, PNorm = 183.7500, GNorm = 0.1370, lr_0 = 1.3153e-04
Loss = 2.3550e-03, PNorm = 183.7500, GNorm = 0.1484, lr_0 = 1.3144e-04
Loss = 2.2380e-03, PNorm = 183.7521, GNorm = 0.0859, lr_0 = 1.3135e-04
Loss = 2.5436e-03, PNorm = 183.7539, GNorm = 0.0913, lr_0 = 1.3126e-04
Loss = 2.6017e-03, PNorm = 183.7551, GNorm = 0.1792, lr_0 = 1.3117e-04
Loss = 4.9606e-03, PNorm = 183.7576, GNorm = 0.2141, lr_0 = 1.3108e-04
Loss = 4.7737e-03, PNorm = 183.7600, GNorm = 0.0906, lr_0 = 1.3099e-04
Loss = 7.1697e-03, PNorm = 183.7632, GNorm = 0.1102, lr_0 = 1.3090e-04
Loss = 1.7414e-03, PNorm = 183.7655, GNorm = 0.0769, lr_0 = 1.3081e-04
Loss = 3.5320e-03, PNorm = 183.7676, GNorm = 0.0902, lr_0 = 1.3072e-04
Loss = 1.5577e-03, PNorm = 183.7696, GNorm = 0.1442, lr_0 = 1.3063e-04
Loss = 4.2211e-03, PNorm = 183.7709, GNorm = 0.0888, lr_0 = 1.3054e-04
Loss = 2.2252e-03, PNorm = 183.7734, GNorm = 0.1540, lr_0 = 1.3045e-04
Loss = 5.5201e-03, PNorm = 183.7740, GNorm = 0.0823, lr_0 = 1.3036e-04
Loss = 4.2497e-03, PNorm = 183.7745, GNorm = 0.0452, lr_0 = 1.3027e-04
Loss = 4.2286e-03, PNorm = 183.7761, GNorm = 0.1097, lr_0 = 1.3018e-04
Loss = 2.3330e-03, PNorm = 183.7780, GNorm = 0.0604, lr_0 = 1.3009e-04
Loss = 1.4681e-03, PNorm = 183.7807, GNorm = 0.1237, lr_0 = 1.3000e-04
Loss = 5.8983e-03, PNorm = 183.7846, GNorm = 0.0698, lr_0 = 1.2992e-04
Loss = 6.0743e-03, PNorm = 183.7864, GNorm = 0.2964, lr_0 = 1.2983e-04
Loss = 2.3693e-03, PNorm = 183.7874, GNorm = 0.1839, lr_0 = 1.2974e-04
Loss = 7.0213e-03, PNorm = 183.7910, GNorm = 0.1123, lr_0 = 1.2965e-04
Loss = 2.3051e-03, PNorm = 183.7928, GNorm = 0.0561, lr_0 = 1.2956e-04
Loss = 3.9799e-03, PNorm = 183.7954, GNorm = 0.1075, lr_0 = 1.2947e-04
Loss = 4.1814e-03, PNorm = 183.7974, GNorm = 0.5533, lr_0 = 1.2938e-04
Loss = 6.5631e-03, PNorm = 183.7995, GNorm = 0.0611, lr_0 = 1.2929e-04
Loss = 1.4863e-03, PNorm = 183.8013, GNorm = 0.1540, lr_0 = 1.2921e-04
Loss = 1.4673e-03, PNorm = 183.8026, GNorm = 0.1307, lr_0 = 1.2912e-04
Loss = 2.2515e-03, PNorm = 183.8052, GNorm = 0.0829, lr_0 = 1.2903e-04
Loss = 1.4150e-03, PNorm = 183.8084, GNorm = 0.0665, lr_0 = 1.2894e-04
Loss = 7.3212e-03, PNorm = 183.8116, GNorm = 0.1276, lr_0 = 1.2885e-04
Loss = 1.4901e-03, PNorm = 183.8140, GNorm = 0.1518, lr_0 = 1.2876e-04
Loss = 3.8053e-03, PNorm = 183.8158, GNorm = 0.0772, lr_0 = 1.2867e-04
Loss = 2.5394e-03, PNorm = 183.8166, GNorm = 0.2051, lr_0 = 1.2859e-04
Loss = 3.4545e-03, PNorm = 183.8178, GNorm = 0.9853, lr_0 = 1.2850e-04
Loss = 1.5940e-03, PNorm = 183.8198, GNorm = 0.0667, lr_0 = 1.2841e-04
Loss = 2.6446e-03, PNorm = 183.8217, GNorm = 0.0670, lr_0 = 1.2832e-04
Loss = 1.6216e-03, PNorm = 183.8243, GNorm = 0.0967, lr_0 = 1.2823e-04
Loss = 6.0549e-03, PNorm = 183.8260, GNorm = 0.1953, lr_0 = 1.2815e-04
Loss = 2.5191e-03, PNorm = 183.8281, GNorm = 0.1301, lr_0 = 1.2806e-04
Loss = 3.8325e-03, PNorm = 183.8311, GNorm = 0.0980, lr_0 = 1.2797e-04
Validation mae = 0.120941
Epoch 27
Loss = 1.1605e-02, PNorm = 183.8335, GNorm = 0.1008, lr_0 = 1.2788e-04
Loss = 2.8314e-03, PNorm = 183.8353, GNorm = 0.4024, lr_0 = 1.2780e-04
Loss = 3.8326e-03, PNorm = 183.8353, GNorm = 0.0761, lr_0 = 1.2771e-04
Loss = 2.2241e-03, PNorm = 183.8359, GNorm = 0.0702, lr_0 = 1.2762e-04
Loss = 5.9659e-03, PNorm = 183.8364, GNorm = 0.2679, lr_0 = 1.2753e-04
Loss = 4.2330e-03, PNorm = 183.8373, GNorm = 0.1576, lr_0 = 1.2745e-04
Loss = 7.1082e-03, PNorm = 183.8381, GNorm = 0.0593, lr_0 = 1.2736e-04
Loss = 2.5606e-03, PNorm = 183.8391, GNorm = 0.0665, lr_0 = 1.2727e-04
Loss = 2.0329e-03, PNorm = 183.8403, GNorm = 0.1035, lr_0 = 1.2718e-04
Loss = 1.9609e-03, PNorm = 183.8423, GNorm = 0.1426, lr_0 = 1.2710e-04
Loss = 1.4533e-03, PNorm = 183.8435, GNorm = 0.0763, lr_0 = 1.2701e-04
Loss = 1.3789e-03, PNorm = 183.8445, GNorm = 0.0969, lr_0 = 1.2692e-04
Loss = 1.9180e-03, PNorm = 183.8467, GNorm = 0.0717, lr_0 = 1.2684e-04
Loss = 3.4637e-03, PNorm = 183.8485, GNorm = 0.0516, lr_0 = 1.2675e-04
Loss = 6.9357e-03, PNorm = 183.8498, GNorm = 0.0368, lr_0 = 1.2666e-04
Loss = 2.9498e-03, PNorm = 183.8514, GNorm = 0.1346, lr_0 = 1.2658e-04
Loss = 4.2976e-03, PNorm = 183.8517, GNorm = 0.0831, lr_0 = 1.2649e-04
Loss = 2.8524e-03, PNorm = 183.8526, GNorm = 0.0689, lr_0 = 1.2640e-04
Loss = 1.3095e-03, PNorm = 183.8544, GNorm = 0.0474, lr_0 = 1.2632e-04
Loss = 1.6895e-03, PNorm = 183.8559, GNorm = 0.1276, lr_0 = 1.2623e-04
Loss = 2.0167e-03, PNorm = 183.8562, GNorm = 0.1276, lr_0 = 1.2614e-04
Loss = 3.9449e-03, PNorm = 183.8572, GNorm = 0.0544, lr_0 = 1.2606e-04
Loss = 1.6042e-03, PNorm = 183.8586, GNorm = 0.0663, lr_0 = 1.2597e-04
Loss = 3.5798e-03, PNorm = 183.8597, GNorm = 0.0613, lr_0 = 1.2588e-04
Loss = 2.3164e-03, PNorm = 183.8607, GNorm = 0.0449, lr_0 = 1.2580e-04
Loss = 3.4751e-03, PNorm = 183.8623, GNorm = 0.1620, lr_0 = 1.2571e-04
Loss = 7.4131e-03, PNorm = 183.8643, GNorm = 0.1785, lr_0 = 1.2563e-04
Loss = 5.2771e-03, PNorm = 183.8648, GNorm = 0.1314, lr_0 = 1.2554e-04
Loss = 3.0910e-03, PNorm = 183.8659, GNorm = 0.1396, lr_0 = 1.2545e-04
Loss = 4.4972e-03, PNorm = 183.8671, GNorm = 0.1083, lr_0 = 1.2537e-04
Loss = 2.5665e-03, PNorm = 183.8676, GNorm = 0.0641, lr_0 = 1.2528e-04
Loss = 4.0553e-03, PNorm = 183.8699, GNorm = 0.1281, lr_0 = 1.2520e-04
Loss = 2.2316e-03, PNorm = 183.8717, GNorm = 0.1249, lr_0 = 1.2511e-04
Loss = 1.4253e-03, PNorm = 183.8740, GNorm = 0.1264, lr_0 = 1.2502e-04
Loss = 2.4283e-03, PNorm = 183.8757, GNorm = 0.1177, lr_0 = 1.2494e-04
Loss = 1.2423e-03, PNorm = 183.8770, GNorm = 0.1375, lr_0 = 1.2485e-04
Loss = 1.9272e-03, PNorm = 183.8780, GNorm = 0.0511, lr_0 = 1.2477e-04
Loss = 1.8666e-03, PNorm = 183.8792, GNorm = 0.0861, lr_0 = 1.2468e-04
Loss = 2.4846e-03, PNorm = 183.8800, GNorm = 0.0525, lr_0 = 1.2460e-04
Loss = 2.9890e-03, PNorm = 183.8810, GNorm = 0.1036, lr_0 = 1.2451e-04
Loss = 2.6278e-03, PNorm = 183.8839, GNorm = 0.0517, lr_0 = 1.2443e-04
Loss = 1.8620e-03, PNorm = 183.8865, GNorm = 0.0586, lr_0 = 1.2434e-04
Loss = 1.6782e-03, PNorm = 183.8890, GNorm = 0.0873, lr_0 = 1.2426e-04
Loss = 4.9754e-03, PNorm = 183.8909, GNorm = 0.6532, lr_0 = 1.2417e-04
Loss = 2.5413e-03, PNorm = 183.8923, GNorm = 0.0929, lr_0 = 1.2409e-04
Loss = 1.8401e-03, PNorm = 183.8932, GNorm = 0.2395, lr_0 = 1.2400e-04
Loss = 2.9295e-03, PNorm = 183.8956, GNorm = 0.0519, lr_0 = 1.2392e-04
Loss = 3.8838e-03, PNorm = 183.8973, GNorm = 0.0938, lr_0 = 1.2383e-04
Loss = 3.3881e-03, PNorm = 183.8975, GNorm = 0.0346, lr_0 = 1.2375e-04
Loss = 2.3022e-03, PNorm = 183.8988, GNorm = 0.0626, lr_0 = 1.2366e-04
Loss = 2.4017e-03, PNorm = 183.8997, GNorm = 0.0596, lr_0 = 1.2358e-04
Loss = 2.7968e-03, PNorm = 183.9011, GNorm = 0.3091, lr_0 = 1.2349e-04
Loss = 3.8703e-03, PNorm = 183.9028, GNorm = 0.0405, lr_0 = 1.2341e-04
Loss = 2.9999e-03, PNorm = 183.9035, GNorm = 0.1344, lr_0 = 1.2332e-04
Loss = 1.4157e-03, PNorm = 183.9047, GNorm = 0.1434, lr_0 = 1.2324e-04
Loss = 2.6151e-03, PNorm = 183.9060, GNorm = 0.0619, lr_0 = 1.2315e-04
Loss = 3.6867e-03, PNorm = 183.9085, GNorm = 0.0952, lr_0 = 1.2307e-04
Loss = 7.0959e-03, PNorm = 183.9112, GNorm = 0.1293, lr_0 = 1.2298e-04
Loss = 1.8037e-03, PNorm = 183.9125, GNorm = 0.1076, lr_0 = 1.2290e-04
Loss = 2.5812e-03, PNorm = 183.9150, GNorm = 0.0680, lr_0 = 1.2282e-04
Loss = 3.1070e-03, PNorm = 183.9181, GNorm = 0.1222, lr_0 = 1.2273e-04
Loss = 1.8291e-03, PNorm = 183.9207, GNorm = 0.0559, lr_0 = 1.2265e-04
Loss = 3.7171e-03, PNorm = 183.9217, GNorm = 0.2086, lr_0 = 1.2256e-04
Loss = 1.2995e-03, PNorm = 183.9228, GNorm = 0.1010, lr_0 = 1.2248e-04
Loss = 1.6532e-03, PNorm = 183.9241, GNorm = 0.1097, lr_0 = 1.2240e-04
Loss = 3.0224e-03, PNorm = 183.9261, GNorm = 0.1273, lr_0 = 1.2231e-04
Loss = 1.9219e-03, PNorm = 183.9282, GNorm = 0.2577, lr_0 = 1.2223e-04
Loss = 5.1748e-03, PNorm = 183.9283, GNorm = 0.1815, lr_0 = 1.2214e-04
Loss = 3.6320e-03, PNorm = 183.9319, GNorm = 0.0905, lr_0 = 1.2206e-04
Loss = 3.3866e-03, PNorm = 183.9342, GNorm = 0.0966, lr_0 = 1.2198e-04
Loss = 2.6503e-03, PNorm = 183.9345, GNorm = 0.1211, lr_0 = 1.2189e-04
Loss = 6.5326e-03, PNorm = 183.9362, GNorm = 0.1689, lr_0 = 1.2181e-04
Loss = 3.6370e-03, PNorm = 183.9396, GNorm = 0.5493, lr_0 = 1.2173e-04
Loss = 2.8809e-03, PNorm = 183.9409, GNorm = 0.0913, lr_0 = 1.2164e-04
Loss = 2.5074e-03, PNorm = 183.9410, GNorm = 0.1157, lr_0 = 1.2156e-04
Loss = 3.8750e-03, PNorm = 183.9424, GNorm = 0.0947, lr_0 = 1.2148e-04
Loss = 2.7644e-03, PNorm = 183.9441, GNorm = 0.1883, lr_0 = 1.2139e-04
Loss = 3.3980e-03, PNorm = 183.9448, GNorm = 0.0962, lr_0 = 1.2131e-04
Loss = 8.4033e-03, PNorm = 183.9477, GNorm = 0.0989, lr_0 = 1.2123e-04
Loss = 1.4696e-03, PNorm = 183.9499, GNorm = 0.0754, lr_0 = 1.2114e-04
Loss = 2.8326e-03, PNorm = 183.9509, GNorm = 0.0811, lr_0 = 1.2106e-04
Loss = 1.5389e-03, PNorm = 183.9533, GNorm = 0.0797, lr_0 = 1.2098e-04
Loss = 3.4506e-03, PNorm = 183.9555, GNorm = 0.0976, lr_0 = 1.2090e-04
Loss = 2.9031e-03, PNorm = 183.9575, GNorm = 0.1189, lr_0 = 1.2081e-04
Loss = 2.2503e-03, PNorm = 183.9585, GNorm = 0.0412, lr_0 = 1.2073e-04
Loss = 7.1326e-03, PNorm = 183.9605, GNorm = 0.0778, lr_0 = 1.2065e-04
Loss = 1.5292e-03, PNorm = 183.9625, GNorm = 0.0934, lr_0 = 1.2056e-04
Loss = 4.7454e-03, PNorm = 183.9645, GNorm = 0.0786, lr_0 = 1.2048e-04
Loss = 2.6716e-03, PNorm = 183.9660, GNorm = 0.0595, lr_0 = 1.2040e-04
Loss = 5.3140e-03, PNorm = 183.9680, GNorm = 0.0550, lr_0 = 1.2032e-04
Loss = 1.3967e-03, PNorm = 183.9703, GNorm = 0.0922, lr_0 = 1.2023e-04
Loss = 1.4316e-03, PNorm = 183.9724, GNorm = 0.0517, lr_0 = 1.2015e-04
Loss = 4.6413e-03, PNorm = 183.9737, GNorm = 0.1571, lr_0 = 1.2007e-04
Loss = 7.0640e-03, PNorm = 183.9746, GNorm = 0.0518, lr_0 = 1.1999e-04
Loss = 2.7787e-03, PNorm = 183.9764, GNorm = 0.2998, lr_0 = 1.1991e-04
Loss = 1.1227e-03, PNorm = 183.9775, GNorm = 0.0519, lr_0 = 1.1982e-04
Loss = 4.0517e-03, PNorm = 183.9804, GNorm = 0.0723, lr_0 = 1.1974e-04
Loss = 3.6718e-03, PNorm = 183.9810, GNorm = 0.0884, lr_0 = 1.1966e-04
Loss = 5.0683e-03, PNorm = 183.9825, GNorm = 0.1347, lr_0 = 1.1958e-04
Loss = 1.7352e-03, PNorm = 183.9833, GNorm = 0.0788, lr_0 = 1.1950e-04
Loss = 1.2326e-03, PNorm = 183.9848, GNorm = 0.0766, lr_0 = 1.1941e-04
Loss = 4.2490e-03, PNorm = 183.9869, GNorm = 0.1716, lr_0 = 1.1933e-04
Loss = 1.9187e-03, PNorm = 183.9875, GNorm = 0.1087, lr_0 = 1.1925e-04
Loss = 3.1271e-03, PNorm = 183.9887, GNorm = 0.1893, lr_0 = 1.1917e-04
Loss = 1.4129e-03, PNorm = 183.9894, GNorm = 0.3588, lr_0 = 1.1909e-04
Loss = 1.1999e-02, PNorm = 183.9901, GNorm = 0.4427, lr_0 = 1.1901e-04
Loss = 1.9672e-03, PNorm = 183.9909, GNorm = 0.0726, lr_0 = 1.1892e-04
Loss = 2.2634e-03, PNorm = 183.9920, GNorm = 0.0871, lr_0 = 1.1884e-04
Loss = 3.1396e-03, PNorm = 183.9941, GNorm = 0.0625, lr_0 = 1.1876e-04
Loss = 1.3790e-03, PNorm = 183.9950, GNorm = 0.0660, lr_0 = 1.1868e-04
Loss = 3.5743e-03, PNorm = 183.9970, GNorm = 0.0727, lr_0 = 1.1860e-04
Loss = 1.5135e-03, PNorm = 184.0001, GNorm = 0.1211, lr_0 = 1.1852e-04
Loss = 1.1230e-03, PNorm = 184.0024, GNorm = 0.0559, lr_0 = 1.1844e-04
Loss = 1.6074e-03, PNorm = 184.0047, GNorm = 0.1011, lr_0 = 1.1835e-04
Loss = 2.7641e-03, PNorm = 184.0069, GNorm = 0.1031, lr_0 = 1.1827e-04
Loss = 4.1000e-03, PNorm = 184.0101, GNorm = 0.0686, lr_0 = 1.1819e-04
Loss = 9.4660e-03, PNorm = 184.0114, GNorm = 0.0518, lr_0 = 1.1811e-04
Loss = 1.7371e-03, PNorm = 184.0117, GNorm = 0.4497, lr_0 = 1.1803e-04
Loss = 3.4451e-03, PNorm = 184.0123, GNorm = 0.1309, lr_0 = 1.1795e-04
Loss = 2.7823e-03, PNorm = 184.0123, GNorm = 0.0758, lr_0 = 1.1787e-04
Validation mae = 0.120747
Epoch 28
Loss = 1.8520e-03, PNorm = 184.0144, GNorm = 0.0573, lr_0 = 1.1779e-04
Loss = 1.8677e-03, PNorm = 184.0164, GNorm = 0.1516, lr_0 = 1.1771e-04
Loss = 6.2174e-03, PNorm = 184.0164, GNorm = 0.1484, lr_0 = 1.1763e-04
Loss = 1.7897e-03, PNorm = 184.0163, GNorm = 0.1249, lr_0 = 1.1755e-04
Loss = 1.2786e-03, PNorm = 184.0175, GNorm = 0.1141, lr_0 = 1.1747e-04
Loss = 6.6507e-03, PNorm = 184.0184, GNorm = 0.0675, lr_0 = 1.1739e-04
Loss = 2.3640e-03, PNorm = 184.0208, GNorm = 0.0652, lr_0 = 1.1730e-04
Loss = 1.6674e-03, PNorm = 184.0228, GNorm = 0.0989, lr_0 = 1.1722e-04
Loss = 4.7699e-03, PNorm = 184.0244, GNorm = 0.0316, lr_0 = 1.1714e-04
Loss = 2.0691e-03, PNorm = 184.0250, GNorm = 0.1716, lr_0 = 1.1706e-04
Loss = 1.6074e-03, PNorm = 184.0263, GNorm = 0.1652, lr_0 = 1.1698e-04
Loss = 1.5812e-03, PNorm = 184.0287, GNorm = 0.0511, lr_0 = 1.1690e-04
Loss = 2.9033e-03, PNorm = 184.0297, GNorm = 0.1159, lr_0 = 1.1682e-04
Loss = 7.3870e-03, PNorm = 184.0309, GNorm = 0.1850, lr_0 = 1.1674e-04
Loss = 1.0539e-03, PNorm = 184.0318, GNorm = 0.0552, lr_0 = 1.1666e-04
Loss = 2.2278e-03, PNorm = 184.0329, GNorm = 0.0729, lr_0 = 1.1658e-04
Loss = 1.8106e-03, PNorm = 184.0340, GNorm = 0.1346, lr_0 = 1.1650e-04
Loss = 1.2055e-03, PNorm = 184.0344, GNorm = 0.0677, lr_0 = 1.1642e-04
Loss = 2.0587e-03, PNorm = 184.0357, GNorm = 0.0840, lr_0 = 1.1634e-04
Loss = 1.9297e-03, PNorm = 184.0360, GNorm = 0.1254, lr_0 = 1.1626e-04
Loss = 2.4443e-03, PNorm = 184.0362, GNorm = 0.0855, lr_0 = 1.1618e-04
Loss = 1.7933e-03, PNorm = 184.0383, GNorm = 0.0702, lr_0 = 1.1611e-04
Loss = 5.4856e-03, PNorm = 184.0404, GNorm = 0.0903, lr_0 = 1.1603e-04
Loss = 4.2947e-03, PNorm = 184.0419, GNorm = 0.1834, lr_0 = 1.1595e-04
Loss = 8.3910e-04, PNorm = 184.0436, GNorm = 0.0415, lr_0 = 1.1587e-04
Loss = 2.7063e-03, PNorm = 184.0446, GNorm = 0.1214, lr_0 = 1.1579e-04
Loss = 2.3036e-03, PNorm = 184.0460, GNorm = 0.0864, lr_0 = 1.1571e-04
Loss = 1.5612e-03, PNorm = 184.0485, GNorm = 0.0583, lr_0 = 1.1563e-04
Loss = 7.5242e-03, PNorm = 184.0503, GNorm = 0.4849, lr_0 = 1.1555e-04
Loss = 1.5669e-03, PNorm = 184.0497, GNorm = 0.0861, lr_0 = 1.1547e-04
Loss = 1.2015e-03, PNorm = 184.0501, GNorm = 0.0909, lr_0 = 1.1539e-04
Loss = 9.8460e-04, PNorm = 184.0512, GNorm = 0.1178, lr_0 = 1.1531e-04
Loss = 5.4442e-03, PNorm = 184.0526, GNorm = 0.1504, lr_0 = 1.1523e-04
Loss = 9.8769e-04, PNorm = 184.0535, GNorm = 0.0654, lr_0 = 1.1515e-04
Loss = 3.8287e-03, PNorm = 184.0546, GNorm = 0.0950, lr_0 = 1.1508e-04
Loss = 1.5039e-03, PNorm = 184.0565, GNorm = 0.0750, lr_0 = 1.1500e-04
Loss = 1.6156e-03, PNorm = 184.0581, GNorm = 0.0642, lr_0 = 1.1492e-04
Loss = 1.3234e-03, PNorm = 184.0596, GNorm = 0.1268, lr_0 = 1.1484e-04
Loss = 3.5387e-03, PNorm = 184.0612, GNorm = 0.1348, lr_0 = 1.1476e-04
Loss = 3.9387e-03, PNorm = 184.0624, GNorm = 0.1710, lr_0 = 1.1468e-04
Loss = 1.3693e-03, PNorm = 184.0644, GNorm = 0.1340, lr_0 = 1.1460e-04
Loss = 2.6084e-03, PNorm = 184.0666, GNorm = 0.0919, lr_0 = 1.1452e-04
Loss = 2.4563e-03, PNorm = 184.0684, GNorm = 0.0792, lr_0 = 1.1445e-04
Loss = 3.3395e-03, PNorm = 184.0700, GNorm = 0.1546, lr_0 = 1.1437e-04
Loss = 2.9261e-03, PNorm = 184.0697, GNorm = 0.0764, lr_0 = 1.1429e-04
Loss = 1.9580e-03, PNorm = 184.0704, GNorm = 0.0825, lr_0 = 1.1421e-04
Loss = 9.7699e-04, PNorm = 184.0710, GNorm = 0.0437, lr_0 = 1.1413e-04
Loss = 4.2270e-03, PNorm = 184.0707, GNorm = 0.1311, lr_0 = 1.1405e-04
Loss = 3.5796e-03, PNorm = 184.0714, GNorm = 0.3742, lr_0 = 1.1398e-04
Loss = 3.1966e-03, PNorm = 184.0728, GNorm = 0.1511, lr_0 = 1.1390e-04
Loss = 3.7136e-03, PNorm = 184.0731, GNorm = 0.1738, lr_0 = 1.1382e-04
Loss = 3.2289e-03, PNorm = 184.0735, GNorm = 0.0847, lr_0 = 1.1374e-04
Loss = 9.6205e-04, PNorm = 184.0741, GNorm = 0.0354, lr_0 = 1.1366e-04
Loss = 2.9328e-03, PNorm = 184.0750, GNorm = 0.0759, lr_0 = 1.1359e-04
Loss = 1.2339e-03, PNorm = 184.0760, GNorm = 0.1092, lr_0 = 1.1351e-04
Loss = 2.0615e-03, PNorm = 184.0769, GNorm = 0.1686, lr_0 = 1.1343e-04
Loss = 3.0883e-03, PNorm = 184.0781, GNorm = 0.0935, lr_0 = 1.1335e-04
Loss = 1.7261e-03, PNorm = 184.0792, GNorm = 0.0860, lr_0 = 1.1328e-04
Loss = 4.5238e-03, PNorm = 184.0806, GNorm = 0.0628, lr_0 = 1.1320e-04
Loss = 1.2600e-03, PNorm = 184.0832, GNorm = 0.1461, lr_0 = 1.1312e-04
Loss = 5.9334e-03, PNorm = 184.0843, GNorm = 1.3576, lr_0 = 1.1304e-04
Loss = 1.4919e-03, PNorm = 184.0839, GNorm = 0.0609, lr_0 = 1.1297e-04
Loss = 4.6956e-03, PNorm = 184.0850, GNorm = 0.1437, lr_0 = 1.1289e-04
Loss = 4.3512e-03, PNorm = 184.0880, GNorm = 0.2824, lr_0 = 1.1281e-04
Loss = 1.8340e-03, PNorm = 184.0888, GNorm = 0.0627, lr_0 = 1.1273e-04
Loss = 3.0962e-03, PNorm = 184.0893, GNorm = 0.1414, lr_0 = 1.1266e-04
Loss = 3.2475e-03, PNorm = 184.0899, GNorm = 0.1446, lr_0 = 1.1258e-04
Loss = 1.4452e-03, PNorm = 184.0916, GNorm = 0.1385, lr_0 = 1.1250e-04
Loss = 1.2938e-03, PNorm = 184.0930, GNorm = 0.1154, lr_0 = 1.1243e-04
Loss = 1.9765e-03, PNorm = 184.0953, GNorm = 0.0430, lr_0 = 1.1235e-04
Loss = 4.1482e-03, PNorm = 184.0978, GNorm = 0.0778, lr_0 = 1.1227e-04
Loss = 1.1417e-03, PNorm = 184.0991, GNorm = 0.1176, lr_0 = 1.1219e-04
Loss = 2.6253e-03, PNorm = 184.1004, GNorm = 0.0392, lr_0 = 1.1212e-04
Loss = 2.3878e-03, PNorm = 184.1021, GNorm = 0.3986, lr_0 = 1.1204e-04
Loss = 5.5259e-03, PNorm = 184.1035, GNorm = 0.0581, lr_0 = 1.1196e-04
Loss = 1.1146e-02, PNorm = 184.1053, GNorm = 0.3669, lr_0 = 1.1189e-04
Loss = 2.2745e-03, PNorm = 184.1041, GNorm = 0.1078, lr_0 = 1.1181e-04
Loss = 6.0677e-03, PNorm = 184.1045, GNorm = 0.4313, lr_0 = 1.1173e-04
Loss = 1.7788e-03, PNorm = 184.1065, GNorm = 0.0826, lr_0 = 1.1166e-04
Loss = 1.2488e-03, PNorm = 184.1075, GNorm = 0.0757, lr_0 = 1.1158e-04
Loss = 5.8934e-03, PNorm = 184.1074, GNorm = 0.2112, lr_0 = 1.1150e-04
Loss = 1.3577e-03, PNorm = 184.1093, GNorm = 0.0750, lr_0 = 1.1143e-04
Loss = 2.5388e-03, PNorm = 184.1110, GNorm = 0.1267, lr_0 = 1.1135e-04
Loss = 3.5280e-03, PNorm = 184.1125, GNorm = 0.1141, lr_0 = 1.1128e-04
Loss = 8.0789e-03, PNorm = 184.1130, GNorm = 0.0620, lr_0 = 1.1120e-04
Loss = 3.8829e-03, PNorm = 184.1139, GNorm = 0.0858, lr_0 = 1.1112e-04
Loss = 4.0273e-03, PNorm = 184.1150, GNorm = 0.6734, lr_0 = 1.1105e-04
Loss = 3.1253e-03, PNorm = 184.1164, GNorm = 0.0445, lr_0 = 1.1097e-04
Loss = 1.4248e-03, PNorm = 184.1179, GNorm = 0.1556, lr_0 = 1.1089e-04
Loss = 3.5565e-03, PNorm = 184.1196, GNorm = 0.1952, lr_0 = 1.1082e-04
Loss = 1.9923e-03, PNorm = 184.1207, GNorm = 0.0451, lr_0 = 1.1074e-04
Loss = 1.4104e-03, PNorm = 184.1220, GNorm = 0.0919, lr_0 = 1.1067e-04
Loss = 2.6221e-03, PNorm = 184.1238, GNorm = 0.0792, lr_0 = 1.1059e-04
Loss = 6.8382e-03, PNorm = 184.1239, GNorm = 0.2253, lr_0 = 1.1052e-04
Loss = 3.0661e-03, PNorm = 184.1266, GNorm = 0.6774, lr_0 = 1.1044e-04
Loss = 1.8860e-03, PNorm = 184.1286, GNorm = 0.1488, lr_0 = 1.1036e-04
Loss = 3.7610e-03, PNorm = 184.1307, GNorm = 0.2633, lr_0 = 1.1029e-04
Loss = 4.1147e-03, PNorm = 184.1335, GNorm = 0.0382, lr_0 = 1.1021e-04
Loss = 1.5381e-03, PNorm = 184.1360, GNorm = 0.0863, lr_0 = 1.1014e-04
Loss = 7.6065e-03, PNorm = 184.1379, GNorm = 0.2255, lr_0 = 1.1006e-04
Loss = 7.1392e-03, PNorm = 184.1406, GNorm = 0.0818, lr_0 = 1.0999e-04
Loss = 1.0662e-03, PNorm = 184.1422, GNorm = 0.0751, lr_0 = 1.0991e-04
Loss = 2.0572e-03, PNorm = 184.1440, GNorm = 0.1795, lr_0 = 1.0984e-04
Loss = 2.9629e-03, PNorm = 184.1458, GNorm = 0.1178, lr_0 = 1.0976e-04
Loss = 5.1091e-03, PNorm = 184.1465, GNorm = 0.0817, lr_0 = 1.0969e-04
Loss = 5.5915e-03, PNorm = 184.1470, GNorm = 0.1153, lr_0 = 1.0961e-04
Loss = 3.7912e-03, PNorm = 184.1475, GNorm = 0.4097, lr_0 = 1.0954e-04
Loss = 4.0795e-03, PNorm = 184.1485, GNorm = 0.1193, lr_0 = 1.0946e-04
Loss = 1.0967e-03, PNorm = 184.1499, GNorm = 0.0711, lr_0 = 1.0939e-04
Loss = 3.7255e-03, PNorm = 184.1511, GNorm = 0.1087, lr_0 = 1.0931e-04
Loss = 1.8450e-03, PNorm = 184.1523, GNorm = 0.0406, lr_0 = 1.0924e-04
Loss = 1.2924e-03, PNorm = 184.1538, GNorm = 0.1130, lr_0 = 1.0916e-04
Loss = 7.5375e-03, PNorm = 184.1547, GNorm = 0.1282, lr_0 = 1.0909e-04
Loss = 1.3786e-03, PNorm = 184.1562, GNorm = 0.1208, lr_0 = 1.0901e-04
Loss = 2.9462e-03, PNorm = 184.1589, GNorm = 0.0753, lr_0 = 1.0894e-04
Loss = 2.4904e-03, PNorm = 184.1606, GNorm = 0.2114, lr_0 = 1.0886e-04
Loss = 1.5979e-03, PNorm = 184.1626, GNorm = 0.0844, lr_0 = 1.0879e-04
Loss = 2.4983e-03, PNorm = 184.1654, GNorm = 0.1282, lr_0 = 1.0871e-04
Loss = 9.2697e-03, PNorm = 184.1685, GNorm = 0.0666, lr_0 = 1.0864e-04
Loss = 1.3122e-03, PNorm = 184.1701, GNorm = 0.0667, lr_0 = 1.0856e-04
Validation mae = 0.120846
Epoch 29
Loss = 3.4986e-03, PNorm = 184.1709, GNorm = 0.1063, lr_0 = 1.0849e-04
Loss = 1.3083e-03, PNorm = 184.1724, GNorm = 0.1469, lr_0 = 1.0841e-04
Loss = 3.6138e-03, PNorm = 184.1734, GNorm = 0.1331, lr_0 = 1.0834e-04
Loss = 3.3371e-03, PNorm = 184.1742, GNorm = 0.0846, lr_0 = 1.0827e-04
Loss = 1.9333e-03, PNorm = 184.1748, GNorm = 0.0738, lr_0 = 1.0819e-04
Loss = 9.5360e-04, PNorm = 184.1755, GNorm = 0.1708, lr_0 = 1.0812e-04
Loss = 8.2306e-04, PNorm = 184.1762, GNorm = 0.0526, lr_0 = 1.0804e-04
Loss = 3.7646e-03, PNorm = 184.1772, GNorm = 0.0908, lr_0 = 1.0797e-04
Loss = 1.9972e-03, PNorm = 184.1786, GNorm = 0.1095, lr_0 = 1.0790e-04
Loss = 1.3658e-03, PNorm = 184.1798, GNorm = 0.0540, lr_0 = 1.0782e-04
Loss = 1.4176e-03, PNorm = 184.1820, GNorm = 0.1437, lr_0 = 1.0775e-04
Loss = 1.3329e-03, PNorm = 184.1827, GNorm = 0.1289, lr_0 = 1.0767e-04
Loss = 4.1604e-03, PNorm = 184.1837, GNorm = 0.2192, lr_0 = 1.0760e-04
Loss = 2.4187e-03, PNorm = 184.1859, GNorm = 0.0777, lr_0 = 1.0753e-04
Loss = 3.8979e-03, PNorm = 184.1881, GNorm = 0.1478, lr_0 = 1.0745e-04
Loss = 1.4203e-03, PNorm = 184.1897, GNorm = 0.0791, lr_0 = 1.0738e-04
Loss = 1.3414e-02, PNorm = 184.1930, GNorm = 0.0790, lr_0 = 1.0731e-04
Loss = 1.2102e-03, PNorm = 184.1939, GNorm = 0.1694, lr_0 = 1.0723e-04
Loss = 4.2669e-03, PNorm = 184.1942, GNorm = 0.1575, lr_0 = 1.0716e-04
Loss = 3.7872e-03, PNorm = 184.1964, GNorm = 0.0904, lr_0 = 1.0709e-04
Loss = 5.4986e-03, PNorm = 184.1980, GNorm = 0.0754, lr_0 = 1.0701e-04
Loss = 6.0549e-03, PNorm = 184.1970, GNorm = 0.1228, lr_0 = 1.0694e-04
Loss = 4.1450e-03, PNorm = 184.1975, GNorm = 0.0675, lr_0 = 1.0687e-04
Loss = 2.3801e-03, PNorm = 184.1997, GNorm = 0.1233, lr_0 = 1.0679e-04
Loss = 3.2530e-03, PNorm = 184.1996, GNorm = 0.1265, lr_0 = 1.0672e-04
Loss = 3.7779e-03, PNorm = 184.1999, GNorm = 0.0564, lr_0 = 1.0665e-04
Loss = 1.1532e-03, PNorm = 184.1999, GNorm = 0.1574, lr_0 = 1.0657e-04
Loss = 2.5655e-03, PNorm = 184.2010, GNorm = 0.0685, lr_0 = 1.0650e-04
Loss = 2.1049e-03, PNorm = 184.2018, GNorm = 0.1818, lr_0 = 1.0643e-04
Loss = 1.7167e-03, PNorm = 184.2025, GNorm = 0.1193, lr_0 = 1.0635e-04
Loss = 3.4739e-03, PNorm = 184.2036, GNorm = 0.0694, lr_0 = 1.0628e-04
Loss = 4.0615e-03, PNorm = 184.2045, GNorm = 0.1499, lr_0 = 1.0621e-04
Loss = 1.8190e-03, PNorm = 184.2063, GNorm = 0.0578, lr_0 = 1.0614e-04
Loss = 4.1751e-03, PNorm = 184.2078, GNorm = 0.0635, lr_0 = 1.0606e-04
Loss = 2.0156e-03, PNorm = 184.2089, GNorm = 0.0836, lr_0 = 1.0599e-04
Loss = 1.0879e-03, PNorm = 184.2094, GNorm = 0.0386, lr_0 = 1.0592e-04
Loss = 2.5711e-03, PNorm = 184.2105, GNorm = 0.2830, lr_0 = 1.0585e-04
Loss = 2.6432e-03, PNorm = 184.2117, GNorm = 0.0466, lr_0 = 1.0577e-04
Loss = 2.4980e-03, PNorm = 184.2126, GNorm = 0.1679, lr_0 = 1.0570e-04
Loss = 4.5203e-03, PNorm = 184.2144, GNorm = 0.0706, lr_0 = 1.0563e-04
Loss = 1.1123e-03, PNorm = 184.2162, GNorm = 0.0674, lr_0 = 1.0556e-04
Loss = 5.8926e-03, PNorm = 184.2176, GNorm = 0.1129, lr_0 = 1.0548e-04
Loss = 3.0501e-03, PNorm = 184.2189, GNorm = 0.0646, lr_0 = 1.0541e-04
Loss = 7.3236e-04, PNorm = 184.2195, GNorm = 0.0654, lr_0 = 1.0534e-04
Loss = 1.9290e-03, PNorm = 184.2196, GNorm = 0.1119, lr_0 = 1.0527e-04
Loss = 1.6932e-03, PNorm = 184.2201, GNorm = 0.0540, lr_0 = 1.0519e-04
Loss = 2.9486e-03, PNorm = 184.2209, GNorm = 0.0865, lr_0 = 1.0512e-04
Loss = 2.7724e-03, PNorm = 184.2218, GNorm = 0.0531, lr_0 = 1.0505e-04
Loss = 3.2309e-03, PNorm = 184.2223, GNorm = 0.0844, lr_0 = 1.0498e-04
Loss = 1.1397e-03, PNorm = 184.2230, GNorm = 0.0375, lr_0 = 1.0491e-04
Loss = 1.2586e-03, PNorm = 184.2251, GNorm = 0.1200, lr_0 = 1.0483e-04
Loss = 4.9231e-03, PNorm = 184.2259, GNorm = 0.0534, lr_0 = 1.0476e-04
Loss = 1.8785e-03, PNorm = 184.2265, GNorm = 0.0706, lr_0 = 1.0469e-04
Loss = 9.2893e-04, PNorm = 184.2276, GNorm = 0.1032, lr_0 = 1.0462e-04
Loss = 1.9889e-03, PNorm = 184.2291, GNorm = 0.0529, lr_0 = 1.0455e-04
Loss = 3.2663e-03, PNorm = 184.2306, GNorm = 0.0573, lr_0 = 1.0448e-04
Loss = 2.0503e-03, PNorm = 184.2325, GNorm = 0.1407, lr_0 = 1.0440e-04
Loss = 2.3688e-03, PNorm = 184.2338, GNorm = 0.0589, lr_0 = 1.0433e-04
Loss = 2.2325e-03, PNorm = 184.2355, GNorm = 0.0527, lr_0 = 1.0426e-04
Loss = 9.8521e-04, PNorm = 184.2370, GNorm = 0.0471, lr_0 = 1.0419e-04
Loss = 1.4116e-03, PNorm = 184.2374, GNorm = 0.1621, lr_0 = 1.0412e-04
Loss = 5.4283e-03, PNorm = 184.2381, GNorm = 0.0920, lr_0 = 1.0405e-04
Loss = 1.5825e-03, PNorm = 184.2385, GNorm = 0.0794, lr_0 = 1.0398e-04
Loss = 1.1060e-03, PNorm = 184.2389, GNorm = 0.0752, lr_0 = 1.0391e-04
Loss = 3.3859e-03, PNorm = 184.2403, GNorm = 0.0797, lr_0 = 1.0383e-04
Loss = 1.3085e-03, PNorm = 184.2425, GNorm = 0.0555, lr_0 = 1.0376e-04
Loss = 1.7911e-03, PNorm = 184.2444, GNorm = 0.1682, lr_0 = 1.0369e-04
Loss = 1.1319e-03, PNorm = 184.2456, GNorm = 0.0675, lr_0 = 1.0362e-04
Loss = 6.2756e-03, PNorm = 184.2466, GNorm = 0.0979, lr_0 = 1.0355e-04
Loss = 2.1824e-03, PNorm = 184.2486, GNorm = 0.1336, lr_0 = 1.0348e-04
Loss = 1.1726e-03, PNorm = 184.2500, GNorm = 0.0429, lr_0 = 1.0341e-04
Loss = 2.3736e-03, PNorm = 184.2517, GNorm = 0.0797, lr_0 = 1.0334e-04
Loss = 2.9357e-03, PNorm = 184.2537, GNorm = 0.0583, lr_0 = 1.0327e-04
Loss = 5.6181e-03, PNorm = 184.2548, GNorm = 0.0740, lr_0 = 1.0320e-04
Loss = 1.4683e-03, PNorm = 184.2554, GNorm = 0.0949, lr_0 = 1.0312e-04
Loss = 2.1947e-03, PNorm = 184.2556, GNorm = 0.2504, lr_0 = 1.0305e-04
Loss = 5.2478e-03, PNorm = 184.2572, GNorm = 0.0997, lr_0 = 1.0298e-04
Loss = 1.8425e-03, PNorm = 184.2582, GNorm = 0.0695, lr_0 = 1.0291e-04
Loss = 3.1395e-03, PNorm = 184.2595, GNorm = 0.0616, lr_0 = 1.0284e-04
Loss = 5.3240e-03, PNorm = 184.2608, GNorm = 0.0653, lr_0 = 1.0277e-04
Loss = 1.3881e-03, PNorm = 184.2624, GNorm = 0.1331, lr_0 = 1.0270e-04
Loss = 3.1343e-03, PNorm = 184.2620, GNorm = 0.1316, lr_0 = 1.0263e-04
Loss = 3.9479e-03, PNorm = 184.2626, GNorm = 0.2797, lr_0 = 1.0256e-04
Loss = 4.3097e-03, PNorm = 184.2646, GNorm = 0.1567, lr_0 = 1.0249e-04
Loss = 1.3450e-03, PNorm = 184.2657, GNorm = 0.0484, lr_0 = 1.0242e-04
Loss = 1.7730e-03, PNorm = 184.2671, GNorm = 0.1587, lr_0 = 1.0235e-04
Loss = 2.2174e-03, PNorm = 184.2680, GNorm = 0.0530, lr_0 = 1.0228e-04
Loss = 2.2383e-03, PNorm = 184.2687, GNorm = 0.0274, lr_0 = 1.0221e-04
Loss = 7.5344e-03, PNorm = 184.2706, GNorm = 1.0494, lr_0 = 1.0214e-04
Loss = 8.6848e-04, PNorm = 184.2723, GNorm = 0.0993, lr_0 = 1.0207e-04
Loss = 1.3916e-03, PNorm = 184.2729, GNorm = 0.0910, lr_0 = 1.0200e-04
Loss = 3.3053e-03, PNorm = 184.2737, GNorm = 0.0590, lr_0 = 1.0193e-04
Loss = 1.5526e-03, PNorm = 184.2746, GNorm = 0.0433, lr_0 = 1.0186e-04
Loss = 1.0457e-02, PNorm = 184.2750, GNorm = 0.1003, lr_0 = 1.0179e-04
Loss = 1.7783e-03, PNorm = 184.2764, GNorm = 0.0796, lr_0 = 1.0172e-04
Loss = 3.4730e-03, PNorm = 184.2778, GNorm = 0.2138, lr_0 = 1.0165e-04
Loss = 4.0170e-03, PNorm = 184.2790, GNorm = 0.0475, lr_0 = 1.0158e-04
Loss = 2.1038e-03, PNorm = 184.2805, GNorm = 0.0698, lr_0 = 1.0151e-04
Loss = 1.6476e-03, PNorm = 184.2812, GNorm = 0.1235, lr_0 = 1.0144e-04
Loss = 5.3547e-03, PNorm = 184.2819, GNorm = 0.0306, lr_0 = 1.0137e-04
Loss = 1.3894e-03, PNorm = 184.2834, GNorm = 0.0466, lr_0 = 1.0130e-04
Loss = 3.6948e-03, PNorm = 184.2845, GNorm = 0.1341, lr_0 = 1.0123e-04
Loss = 3.1764e-03, PNorm = 184.2869, GNorm = 0.0408, lr_0 = 1.0116e-04
Loss = 1.0583e-03, PNorm = 184.2887, GNorm = 0.0625, lr_0 = 1.0110e-04
Loss = 1.4939e-03, PNorm = 184.2893, GNorm = 0.0591, lr_0 = 1.0103e-04
Loss = 2.5068e-03, PNorm = 184.2905, GNorm = 0.0910, lr_0 = 1.0096e-04
Loss = 1.0576e-03, PNorm = 184.2915, GNorm = 0.1537, lr_0 = 1.0089e-04
Loss = 1.4300e-03, PNorm = 184.2929, GNorm = 0.0429, lr_0 = 1.0082e-04
Loss = 6.5618e-03, PNorm = 184.2947, GNorm = 0.0551, lr_0 = 1.0075e-04
Loss = 8.8445e-04, PNorm = 184.2968, GNorm = 0.0773, lr_0 = 1.0068e-04
Loss = 1.1719e-03, PNorm = 184.2984, GNorm = 0.0476, lr_0 = 1.0061e-04
Loss = 4.2985e-03, PNorm = 184.3003, GNorm = 0.0847, lr_0 = 1.0054e-04
Loss = 6.2901e-03, PNorm = 184.3009, GNorm = 0.0615, lr_0 = 1.0047e-04
Loss = 3.0017e-03, PNorm = 184.3017, GNorm = 0.0903, lr_0 = 1.0041e-04
Loss = 3.4623e-03, PNorm = 184.3022, GNorm = 0.1715, lr_0 = 1.0034e-04
Loss = 2.9870e-03, PNorm = 184.3038, GNorm = 0.0909, lr_0 = 1.0027e-04
Loss = 2.2909e-03, PNorm = 184.3052, GNorm = 0.2178, lr_0 = 1.0020e-04
Loss = 2.5742e-03, PNorm = 184.3060, GNorm = 0.1588, lr_0 = 1.0013e-04
Loss = 2.4241e-03, PNorm = 184.3069, GNorm = 0.0638, lr_0 = 1.0006e-04
Loss = 4.8389e-03, PNorm = 184.3084, GNorm = 0.0931, lr_0 = 1.0000e-04
Validation mae = 0.120657
Model 0 best validation mae = 0.120657 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119513
Ensemble test mae = 0.119513
Fold 2
Splitting data with seed 2
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 8.9146e-01, PNorm = 64.6276, GNorm = 1.7508, lr_0 = 1.0413e-04
Loss = 7.3991e-01, PNorm = 64.6378, GNorm = 2.5642, lr_0 = 1.0788e-04
Loss = 8.0100e-01, PNorm = 64.6482, GNorm = 2.5211, lr_0 = 1.1163e-04
Loss = 8.0165e-01, PNorm = 64.6591, GNorm = 3.3243, lr_0 = 1.1537e-04
Loss = 7.0145e-01, PNorm = 64.6697, GNorm = 2.5211, lr_0 = 1.1913e-04
Loss = 7.3158e-01, PNorm = 64.6798, GNorm = 2.1264, lr_0 = 1.2287e-04
Loss = 7.1044e-01, PNorm = 64.6895, GNorm = 3.0881, lr_0 = 1.2663e-04
Loss = 7.1622e-01, PNorm = 64.7008, GNorm = 2.3221, lr_0 = 1.3038e-04
Loss = 6.5496e-01, PNorm = 64.7119, GNorm = 2.5232, lr_0 = 1.3413e-04
Loss = 5.9577e-01, PNorm = 64.7214, GNorm = 1.9348, lr_0 = 1.3788e-04
Loss = 7.1941e-01, PNorm = 64.7318, GNorm = 2.8076, lr_0 = 1.4163e-04
Loss = 6.6988e-01, PNorm = 64.7436, GNorm = 2.5700, lr_0 = 1.4537e-04
Loss = 6.3277e-01, PNorm = 64.7563, GNorm = 1.9678, lr_0 = 1.4913e-04
Loss = 5.9209e-01, PNorm = 64.7684, GNorm = 2.5550, lr_0 = 1.5288e-04
Loss = 5.7527e-01, PNorm = 64.7802, GNorm = 2.4387, lr_0 = 1.5662e-04
Loss = 6.0729e-01, PNorm = 64.7882, GNorm = 2.1695, lr_0 = 1.6038e-04
Loss = 6.1077e-01, PNorm = 64.7986, GNorm = 3.7205, lr_0 = 1.6412e-04
Loss = 6.1524e-01, PNorm = 64.8114, GNorm = 2.5714, lr_0 = 1.6788e-04
Loss = 6.3316e-01, PNorm = 64.8248, GNorm = 2.0220, lr_0 = 1.7163e-04
Loss = 5.7723e-01, PNorm = 64.8357, GNorm = 3.1314, lr_0 = 1.7538e-04
Loss = 5.1880e-01, PNorm = 64.8474, GNorm = 2.8829, lr_0 = 1.7913e-04
Loss = 6.6383e-01, PNorm = 64.8618, GNorm = 3.1634, lr_0 = 1.8288e-04
Loss = 6.1637e-01, PNorm = 64.8764, GNorm = 2.9650, lr_0 = 1.8662e-04
Loss = 5.7127e-01, PNorm = 64.8899, GNorm = 1.7593, lr_0 = 1.9038e-04
Loss = 6.3246e-01, PNorm = 64.9037, GNorm = 2.1751, lr_0 = 1.9413e-04
Loss = 6.4811e-01, PNorm = 64.9193, GNorm = 1.9330, lr_0 = 1.9788e-04
Loss = 6.3965e-01, PNorm = 64.9346, GNorm = 2.4710, lr_0 = 2.0163e-04
Loss = 5.3356e-01, PNorm = 64.9532, GNorm = 2.7269, lr_0 = 2.0537e-04
Loss = 5.6945e-01, PNorm = 64.9702, GNorm = 1.9526, lr_0 = 2.0913e-04
Loss = 5.7094e-01, PNorm = 64.9884, GNorm = 2.1997, lr_0 = 2.1288e-04
Loss = 5.8007e-01, PNorm = 65.0088, GNorm = 1.8698, lr_0 = 2.1663e-04
Loss = 5.1650e-01, PNorm = 65.0255, GNorm = 2.1030, lr_0 = 2.2038e-04
Loss = 5.2840e-01, PNorm = 65.0439, GNorm = 2.4748, lr_0 = 2.2412e-04
Loss = 5.7977e-01, PNorm = 65.0645, GNorm = 2.8642, lr_0 = 2.2787e-04
Loss = 6.1212e-01, PNorm = 65.0881, GNorm = 1.6204, lr_0 = 2.3163e-04
Loss = 5.3596e-01, PNorm = 65.1082, GNorm = 2.0814, lr_0 = 2.3538e-04
Loss = 6.1006e-01, PNorm = 65.1263, GNorm = 2.1426, lr_0 = 2.3913e-04
Loss = 5.0834e-01, PNorm = 65.1472, GNorm = 1.6317, lr_0 = 2.4288e-04
Loss = 4.9716e-01, PNorm = 65.1673, GNorm = 1.7549, lr_0 = 2.4662e-04
Loss = 4.5519e-01, PNorm = 65.1865, GNorm = 1.6979, lr_0 = 2.5038e-04
Loss = 4.9774e-01, PNorm = 65.2081, GNorm = 1.6538, lr_0 = 2.5413e-04
Loss = 5.5842e-01, PNorm = 65.2281, GNorm = 1.7358, lr_0 = 2.5788e-04
Loss = 5.3902e-01, PNorm = 65.2501, GNorm = 1.7613, lr_0 = 2.6163e-04
Loss = 5.4037e-01, PNorm = 65.2728, GNorm = 2.9432, lr_0 = 2.6537e-04
Loss = 4.8283e-01, PNorm = 65.2942, GNorm = 2.5699, lr_0 = 2.6912e-04
Loss = 5.5407e-01, PNorm = 65.3181, GNorm = 1.5157, lr_0 = 2.7288e-04
Loss = 5.5516e-01, PNorm = 65.3429, GNorm = 1.5015, lr_0 = 2.7663e-04
Loss = 6.2728e-01, PNorm = 65.3692, GNorm = 1.9895, lr_0 = 2.8038e-04
Loss = 5.3942e-01, PNorm = 65.3959, GNorm = 2.0352, lr_0 = 2.8413e-04
Loss = 4.6642e-01, PNorm = 65.4221, GNorm = 1.5238, lr_0 = 2.8787e-04
Loss = 5.3739e-01, PNorm = 65.4521, GNorm = 1.9867, lr_0 = 2.9163e-04
Loss = 4.4917e-01, PNorm = 65.4793, GNorm = 1.1896, lr_0 = 2.9538e-04
Loss = 5.2176e-01, PNorm = 65.5073, GNorm = 1.3377, lr_0 = 2.9913e-04
Loss = 5.9483e-01, PNorm = 65.5349, GNorm = 2.0024, lr_0 = 3.0288e-04
Loss = 5.3816e-01, PNorm = 65.5649, GNorm = 1.3284, lr_0 = 3.0662e-04
Loss = 5.3303e-01, PNorm = 65.5949, GNorm = 1.4239, lr_0 = 3.1037e-04
Loss = 4.8581e-01, PNorm = 65.6262, GNorm = 1.7146, lr_0 = 3.1413e-04
Loss = 4.8644e-01, PNorm = 65.6501, GNorm = 1.7514, lr_0 = 3.1788e-04
Loss = 5.9170e-01, PNorm = 65.6811, GNorm = 1.6409, lr_0 = 3.2163e-04
Loss = 5.3952e-01, PNorm = 65.7141, GNorm = 1.1757, lr_0 = 3.2538e-04
Loss = 5.2969e-01, PNorm = 65.7456, GNorm = 1.3969, lr_0 = 3.2912e-04
Loss = 6.0155e-01, PNorm = 65.7791, GNorm = 2.8110, lr_0 = 3.3288e-04
Loss = 4.8085e-01, PNorm = 65.8116, GNorm = 1.3886, lr_0 = 3.3663e-04
Loss = 5.3743e-01, PNorm = 65.8425, GNorm = 1.3684, lr_0 = 3.4038e-04
Loss = 5.4718e-01, PNorm = 65.8743, GNorm = 1.5854, lr_0 = 3.4413e-04
Loss = 6.1990e-01, PNorm = 65.9064, GNorm = 1.3332, lr_0 = 3.4787e-04
Loss = 5.2577e-01, PNorm = 65.9388, GNorm = 1.7699, lr_0 = 3.5162e-04
Loss = 5.5158e-01, PNorm = 65.9750, GNorm = 1.6932, lr_0 = 3.5538e-04
Loss = 5.2121e-01, PNorm = 66.0068, GNorm = 1.6076, lr_0 = 3.5913e-04
Loss = 5.1130e-01, PNorm = 66.0450, GNorm = 1.4981, lr_0 = 3.6288e-04
Loss = 5.7601e-01, PNorm = 66.0809, GNorm = 2.1056, lr_0 = 3.6662e-04
Loss = 5.3173e-01, PNorm = 66.1206, GNorm = 2.0344, lr_0 = 3.7037e-04
Loss = 5.4703e-01, PNorm = 66.1609, GNorm = 1.5148, lr_0 = 3.7413e-04
Loss = 5.3269e-01, PNorm = 66.1960, GNorm = 1.2537, lr_0 = 3.7788e-04
Loss = 5.1961e-01, PNorm = 66.2342, GNorm = 1.5384, lr_0 = 3.8163e-04
Loss = 5.6033e-01, PNorm = 66.2766, GNorm = 1.5652, lr_0 = 3.8537e-04
Loss = 5.7492e-01, PNorm = 66.3207, GNorm = 1.9040, lr_0 = 3.8912e-04
Loss = 4.7378e-01, PNorm = 66.3642, GNorm = 1.9001, lr_0 = 3.9287e-04
Loss = 5.1210e-01, PNorm = 66.4055, GNorm = 1.5274, lr_0 = 3.9663e-04
Loss = 5.1675e-01, PNorm = 66.4493, GNorm = 1.7876, lr_0 = 4.0038e-04
Loss = 5.4995e-01, PNorm = 66.4876, GNorm = 1.5823, lr_0 = 4.0413e-04
Loss = 5.2273e-01, PNorm = 66.5310, GNorm = 1.3660, lr_0 = 4.0787e-04
Loss = 5.2217e-01, PNorm = 66.5743, GNorm = 1.3577, lr_0 = 4.1162e-04
Loss = 5.7040e-01, PNorm = 66.6162, GNorm = 1.3496, lr_0 = 4.1537e-04
Loss = 5.6374e-01, PNorm = 66.6617, GNorm = 0.9963, lr_0 = 4.1913e-04
Loss = 5.2037e-01, PNorm = 66.7128, GNorm = 1.7921, lr_0 = 4.2288e-04
Loss = 5.3543e-01, PNorm = 66.7621, GNorm = 1.1987, lr_0 = 4.2662e-04
Loss = 5.6579e-01, PNorm = 66.8149, GNorm = 1.7560, lr_0 = 4.3037e-04
Loss = 5.0459e-01, PNorm = 66.8647, GNorm = 1.4067, lr_0 = 4.3412e-04
Loss = 4.6815e-01, PNorm = 66.9105, GNorm = 1.1931, lr_0 = 4.3788e-04
Loss = 4.9743e-01, PNorm = 66.9583, GNorm = 1.5447, lr_0 = 4.4163e-04
Loss = 5.0086e-01, PNorm = 67.0020, GNorm = 1.2759, lr_0 = 4.4538e-04
Loss = 4.9076e-01, PNorm = 67.0464, GNorm = 1.4307, lr_0 = 4.4912e-04
Loss = 5.6002e-01, PNorm = 67.0902, GNorm = 1.1680, lr_0 = 4.5287e-04
Loss = 5.1529e-01, PNorm = 67.1426, GNorm = 1.3556, lr_0 = 4.5662e-04
Loss = 5.5153e-01, PNorm = 67.1913, GNorm = 1.7697, lr_0 = 4.6038e-04
Loss = 5.4076e-01, PNorm = 67.2381, GNorm = 1.8667, lr_0 = 4.6413e-04
Loss = 4.7336e-01, PNorm = 67.2962, GNorm = 1.0909, lr_0 = 4.6787e-04
Loss = 5.5553e-01, PNorm = 67.3495, GNorm = 1.4253, lr_0 = 4.7162e-04
Loss = 4.6189e-01, PNorm = 67.4029, GNorm = 1.3339, lr_0 = 4.7537e-04
Loss = 5.5851e-01, PNorm = 67.4598, GNorm = 1.1453, lr_0 = 4.7913e-04
Loss = 5.3727e-01, PNorm = 67.5202, GNorm = 1.2017, lr_0 = 4.8288e-04
Loss = 4.9593e-01, PNorm = 67.5825, GNorm = 1.1327, lr_0 = 4.8663e-04
Loss = 4.6361e-01, PNorm = 67.6387, GNorm = 1.2237, lr_0 = 4.9038e-04
Loss = 4.4419e-01, PNorm = 67.6910, GNorm = 1.0544, lr_0 = 4.9412e-04
Loss = 4.4677e-01, PNorm = 67.7465, GNorm = 1.3063, lr_0 = 4.9788e-04
Loss = 5.4144e-01, PNorm = 67.8024, GNorm = 1.2752, lr_0 = 5.0163e-04
Loss = 4.1774e-01, PNorm = 67.8653, GNorm = 1.1679, lr_0 = 5.0538e-04
Loss = 4.7735e-01, PNorm = 67.9223, GNorm = 1.4466, lr_0 = 5.0913e-04
Loss = 5.2001e-01, PNorm = 67.9734, GNorm = 1.1878, lr_0 = 5.1287e-04
Loss = 5.0244e-01, PNorm = 68.0304, GNorm = 1.2961, lr_0 = 5.1663e-04
Loss = 5.5317e-01, PNorm = 68.0962, GNorm = 1.2796, lr_0 = 5.2038e-04
Loss = 4.9817e-01, PNorm = 68.1568, GNorm = 1.4516, lr_0 = 5.2413e-04
Loss = 5.2275e-01, PNorm = 68.2236, GNorm = 0.9819, lr_0 = 5.2788e-04
Loss = 5.4286e-01, PNorm = 68.2813, GNorm = 1.1845, lr_0 = 5.3162e-04
Loss = 4.9178e-01, PNorm = 68.3506, GNorm = 1.3073, lr_0 = 5.3538e-04
Loss = 4.5357e-01, PNorm = 68.4142, GNorm = 1.2508, lr_0 = 5.3912e-04
Loss = 5.1102e-01, PNorm = 68.4754, GNorm = 1.2597, lr_0 = 5.4288e-04
Loss = 4.6091e-01, PNorm = 68.5481, GNorm = 1.7488, lr_0 = 5.4663e-04
Loss = 5.2714e-01, PNorm = 68.6124, GNorm = 1.3938, lr_0 = 5.5038e-04
Validation mae = 0.128551
Epoch 1
Loss = 3.7336e-01, PNorm = 68.6913, GNorm = 1.0695, lr_0 = 5.5413e-04
Loss = 4.0549e-01, PNorm = 68.7617, GNorm = 1.0779, lr_0 = 5.5787e-04
Loss = 3.7280e-01, PNorm = 68.8227, GNorm = 0.8053, lr_0 = 5.6163e-04
Loss = 4.3152e-01, PNorm = 68.8958, GNorm = 1.3692, lr_0 = 5.6538e-04
Loss = 4.1299e-01, PNorm = 68.9700, GNorm = 1.4079, lr_0 = 5.6913e-04
Loss = 3.7149e-01, PNorm = 69.0424, GNorm = 0.9032, lr_0 = 5.7288e-04
Loss = 3.7041e-01, PNorm = 69.1310, GNorm = 1.1972, lr_0 = 5.7662e-04
Loss = 3.7400e-01, PNorm = 69.2130, GNorm = 1.0452, lr_0 = 5.8038e-04
Loss = 3.7979e-01, PNorm = 69.2971, GNorm = 1.3904, lr_0 = 5.8413e-04
Loss = 3.3319e-01, PNorm = 69.3920, GNorm = 1.2740, lr_0 = 5.8788e-04
Loss = 3.9813e-01, PNorm = 69.4902, GNorm = 1.5378, lr_0 = 5.9163e-04
Loss = 3.6471e-01, PNorm = 69.5850, GNorm = 1.2418, lr_0 = 5.9538e-04
Loss = 3.6412e-01, PNorm = 69.6860, GNorm = 1.6833, lr_0 = 5.9913e-04
Loss = 3.9073e-01, PNorm = 69.7979, GNorm = 1.2716, lr_0 = 6.0288e-04
Loss = 3.4127e-01, PNorm = 69.9039, GNorm = 1.5431, lr_0 = 6.0663e-04
Loss = 3.6768e-01, PNorm = 70.0124, GNorm = 1.4304, lr_0 = 6.1038e-04
Loss = 3.8624e-01, PNorm = 70.1127, GNorm = 1.1025, lr_0 = 6.1413e-04
Loss = 4.4115e-01, PNorm = 70.2178, GNorm = 1.7305, lr_0 = 6.1788e-04
Loss = 4.3447e-01, PNorm = 70.3343, GNorm = 1.1215, lr_0 = 6.2163e-04
Loss = 3.9837e-01, PNorm = 70.4518, GNorm = 1.3209, lr_0 = 6.2538e-04
Loss = 3.8268e-01, PNorm = 70.5732, GNorm = 1.2847, lr_0 = 6.2913e-04
Loss = 3.7847e-01, PNorm = 70.6794, GNorm = 1.2428, lr_0 = 6.3288e-04
Loss = 3.9054e-01, PNorm = 70.7894, GNorm = 1.1336, lr_0 = 6.3663e-04
Loss = 4.2510e-01, PNorm = 70.8959, GNorm = 1.9569, lr_0 = 6.4038e-04
Loss = 3.6126e-01, PNorm = 71.0142, GNorm = 0.9468, lr_0 = 6.4413e-04
Loss = 3.7842e-01, PNorm = 71.1290, GNorm = 1.2447, lr_0 = 6.4788e-04
Loss = 3.2549e-01, PNorm = 71.2509, GNorm = 1.3160, lr_0 = 6.5163e-04
Loss = 3.7081e-01, PNorm = 71.3610, GNorm = 1.1659, lr_0 = 6.5538e-04
Loss = 4.0431e-01, PNorm = 71.4868, GNorm = 1.4994, lr_0 = 6.5913e-04
Loss = 3.9128e-01, PNorm = 71.6082, GNorm = 1.1436, lr_0 = 6.6288e-04
Loss = 3.8389e-01, PNorm = 71.7233, GNorm = 1.2846, lr_0 = 6.6663e-04
Loss = 4.2345e-01, PNorm = 71.8353, GNorm = 1.3364, lr_0 = 6.7038e-04
Loss = 3.7095e-01, PNorm = 71.9502, GNorm = 1.5559, lr_0 = 6.7413e-04
Loss = 3.6895e-01, PNorm = 72.0648, GNorm = 1.2094, lr_0 = 6.7788e-04
Loss = 4.4385e-01, PNorm = 72.1976, GNorm = 1.4323, lr_0 = 6.8163e-04
Loss = 4.1702e-01, PNorm = 72.3305, GNorm = 1.3547, lr_0 = 6.8538e-04
Loss = 4.9314e-01, PNorm = 72.4537, GNorm = 1.2777, lr_0 = 6.8913e-04
Loss = 4.3682e-01, PNorm = 72.6030, GNorm = 1.3945, lr_0 = 6.9288e-04
Loss = 3.6998e-01, PNorm = 72.7175, GNorm = 1.4383, lr_0 = 6.9663e-04
Loss = 4.1079e-01, PNorm = 72.8426, GNorm = 1.5466, lr_0 = 7.0038e-04
Loss = 5.2343e-01, PNorm = 72.9732, GNorm = 1.3636, lr_0 = 7.0413e-04
Loss = 4.3481e-01, PNorm = 73.0969, GNorm = 1.6549, lr_0 = 7.0788e-04
Loss = 4.1854e-01, PNorm = 73.2256, GNorm = 1.2343, lr_0 = 7.1163e-04
Loss = 3.9317e-01, PNorm = 73.3589, GNorm = 0.9493, lr_0 = 7.1538e-04
Loss = 4.2669e-01, PNorm = 73.4890, GNorm = 1.5461, lr_0 = 7.1913e-04
Loss = 4.1974e-01, PNorm = 73.6108, GNorm = 1.4865, lr_0 = 7.2288e-04
Loss = 4.2672e-01, PNorm = 73.7366, GNorm = 1.1631, lr_0 = 7.2663e-04
Loss = 3.6517e-01, PNorm = 73.8606, GNorm = 1.2578, lr_0 = 7.3038e-04
Loss = 4.0884e-01, PNorm = 73.9860, GNorm = 1.0375, lr_0 = 7.3413e-04
Loss = 4.4552e-01, PNorm = 74.1149, GNorm = 1.2947, lr_0 = 7.3788e-04
Loss = 3.8253e-01, PNorm = 74.2442, GNorm = 1.3349, lr_0 = 7.4163e-04
Loss = 3.7795e-01, PNorm = 74.3850, GNorm = 1.4155, lr_0 = 7.4538e-04
Loss = 4.0525e-01, PNorm = 74.5081, GNorm = 0.9430, lr_0 = 7.4913e-04
Loss = 4.6362e-01, PNorm = 74.6512, GNorm = 1.3763, lr_0 = 7.5288e-04
Loss = 3.8354e-01, PNorm = 74.7806, GNorm = 1.3692, lr_0 = 7.5663e-04
Loss = 4.2518e-01, PNorm = 74.9051, GNorm = 0.9816, lr_0 = 7.6038e-04
Loss = 3.5469e-01, PNorm = 75.0308, GNorm = 1.0810, lr_0 = 7.6413e-04
Loss = 4.0074e-01, PNorm = 75.1520, GNorm = 1.1851, lr_0 = 7.6788e-04
Loss = 4.3558e-01, PNorm = 75.2826, GNorm = 1.4445, lr_0 = 7.7163e-04
Loss = 4.1235e-01, PNorm = 75.4132, GNorm = 1.0617, lr_0 = 7.7538e-04
Loss = 4.0744e-01, PNorm = 75.5579, GNorm = 1.2832, lr_0 = 7.7913e-04
Loss = 4.4762e-01, PNorm = 75.6834, GNorm = 1.3117, lr_0 = 7.8288e-04
Loss = 3.8609e-01, PNorm = 75.8252, GNorm = 1.1810, lr_0 = 7.8663e-04
Loss = 4.2562e-01, PNorm = 75.9645, GNorm = 1.2039, lr_0 = 7.9038e-04
Loss = 4.0429e-01, PNorm = 76.0968, GNorm = 0.6980, lr_0 = 7.9413e-04
Loss = 4.4055e-01, PNorm = 76.2285, GNorm = 0.9027, lr_0 = 7.9788e-04
Loss = 4.1580e-01, PNorm = 76.3622, GNorm = 0.9673, lr_0 = 8.0163e-04
Loss = 3.9749e-01, PNorm = 76.4871, GNorm = 1.0887, lr_0 = 8.0538e-04
Loss = 4.8666e-01, PNorm = 76.6372, GNorm = 1.1357, lr_0 = 8.0913e-04
Loss = 4.3767e-01, PNorm = 76.7791, GNorm = 1.3708, lr_0 = 8.1288e-04
Loss = 4.5181e-01, PNorm = 76.9449, GNorm = 1.5209, lr_0 = 8.1663e-04
Loss = 4.6868e-01, PNorm = 77.0936, GNorm = 1.1437, lr_0 = 8.2038e-04
Loss = 4.3819e-01, PNorm = 77.2519, GNorm = 1.3970, lr_0 = 8.2413e-04
Loss = 4.2380e-01, PNorm = 77.3992, GNorm = 1.1374, lr_0 = 8.2788e-04
Loss = 4.1237e-01, PNorm = 77.5331, GNorm = 0.8808, lr_0 = 8.3163e-04
Loss = 4.6918e-01, PNorm = 77.6818, GNorm = 1.0634, lr_0 = 8.3538e-04
Loss = 3.7232e-01, PNorm = 77.8203, GNorm = 0.8923, lr_0 = 8.3913e-04
Loss = 4.2657e-01, PNorm = 77.9668, GNorm = 1.0167, lr_0 = 8.4288e-04
Loss = 4.5341e-01, PNorm = 78.1244, GNorm = 2.3029, lr_0 = 8.4663e-04
Loss = 4.0408e-01, PNorm = 78.2745, GNorm = 1.3334, lr_0 = 8.5038e-04
Loss = 5.7042e-01, PNorm = 78.4458, GNorm = 1.2143, lr_0 = 8.5413e-04
Loss = 4.0809e-01, PNorm = 78.6014, GNorm = 0.9706, lr_0 = 8.5788e-04
Loss = 4.2496e-01, PNorm = 78.7515, GNorm = 1.2616, lr_0 = 8.6163e-04
Loss = 4.5693e-01, PNorm = 78.9008, GNorm = 1.5509, lr_0 = 8.6538e-04
Loss = 4.3008e-01, PNorm = 79.0486, GNorm = 1.0644, lr_0 = 8.6913e-04
Loss = 4.1372e-01, PNorm = 79.1918, GNorm = 1.4564, lr_0 = 8.7288e-04
Loss = 4.4566e-01, PNorm = 79.3106, GNorm = 1.3541, lr_0 = 8.7663e-04
Loss = 4.5772e-01, PNorm = 79.4605, GNorm = 2.0799, lr_0 = 8.8038e-04
Loss = 4.7832e-01, PNorm = 79.6056, GNorm = 1.0130, lr_0 = 8.8413e-04
Loss = 3.9865e-01, PNorm = 79.7560, GNorm = 0.8981, lr_0 = 8.8788e-04
Loss = 4.6032e-01, PNorm = 79.9025, GNorm = 1.5285, lr_0 = 8.9163e-04
Loss = 5.0585e-01, PNorm = 80.0541, GNorm = 1.8800, lr_0 = 8.9538e-04
Loss = 4.4990e-01, PNorm = 80.2149, GNorm = 2.1738, lr_0 = 8.9913e-04
Loss = 4.0035e-01, PNorm = 80.3714, GNorm = 0.9005, lr_0 = 9.0288e-04
Loss = 4.0589e-01, PNorm = 80.5303, GNorm = 1.2504, lr_0 = 9.0663e-04
Loss = 4.8191e-01, PNorm = 80.6790, GNorm = 1.6105, lr_0 = 9.1038e-04
Loss = 4.4365e-01, PNorm = 80.8360, GNorm = 1.1195, lr_0 = 9.1413e-04
Loss = 4.1630e-01, PNorm = 80.9858, GNorm = 0.9823, lr_0 = 9.1788e-04
Loss = 4.2111e-01, PNorm = 81.1330, GNorm = 0.8668, lr_0 = 9.2163e-04
Loss = 4.0396e-01, PNorm = 81.2657, GNorm = 1.1985, lr_0 = 9.2538e-04
Loss = 4.5380e-01, PNorm = 81.3969, GNorm = 1.1090, lr_0 = 9.2913e-04
Loss = 4.3403e-01, PNorm = 81.5448, GNorm = 1.3086, lr_0 = 9.3288e-04
Loss = 4.0651e-01, PNorm = 81.7049, GNorm = 1.0567, lr_0 = 9.3663e-04
Loss = 4.5766e-01, PNorm = 81.8687, GNorm = 1.1094, lr_0 = 9.4038e-04
Loss = 3.9756e-01, PNorm = 82.0161, GNorm = 1.2587, lr_0 = 9.4413e-04
Loss = 3.8177e-01, PNorm = 82.1776, GNorm = 1.0222, lr_0 = 9.4788e-04
Loss = 4.7645e-01, PNorm = 82.3263, GNorm = 0.9489, lr_0 = 9.5163e-04
Loss = 3.8557e-01, PNorm = 82.4719, GNorm = 0.8338, lr_0 = 9.5538e-04
Loss = 4.3748e-01, PNorm = 82.6238, GNorm = 0.8302, lr_0 = 9.5913e-04
Loss = 4.0480e-01, PNorm = 82.7677, GNorm = 0.9143, lr_0 = 9.6288e-04
Loss = 4.3686e-01, PNorm = 82.9060, GNorm = 1.2864, lr_0 = 9.6663e-04
Loss = 4.6806e-01, PNorm = 83.0445, GNorm = 1.0515, lr_0 = 9.7038e-04
Loss = 4.6074e-01, PNorm = 83.1871, GNorm = 1.0572, lr_0 = 9.7413e-04
Loss = 4.5849e-01, PNorm = 83.3380, GNorm = 1.2614, lr_0 = 9.7788e-04
Loss = 3.7955e-01, PNorm = 83.4907, GNorm = 1.3952, lr_0 = 9.8163e-04
Loss = 5.2967e-01, PNorm = 83.6532, GNorm = 0.8954, lr_0 = 9.8537e-04
Loss = 4.5979e-01, PNorm = 83.8337, GNorm = 1.4463, lr_0 = 9.8912e-04
Loss = 4.4652e-01, PNorm = 84.0202, GNorm = 1.3992, lr_0 = 9.9288e-04
Loss = 4.2127e-01, PNorm = 84.2093, GNorm = 1.3260, lr_0 = 9.9663e-04
Loss = 4.4711e-01, PNorm = 84.3837, GNorm = 0.7049, lr_0 = 9.9993e-04
Validation mae = 0.128738
Epoch 2
Loss = 3.0571e-01, PNorm = 84.5435, GNorm = 1.2513, lr_0 = 9.9925e-04
Loss = 3.3898e-01, PNorm = 84.6891, GNorm = 0.8755, lr_0 = 9.9856e-04
Loss = 2.6778e-01, PNorm = 84.8499, GNorm = 0.7386, lr_0 = 9.9788e-04
Loss = 2.3608e-01, PNorm = 85.0074, GNorm = 0.7797, lr_0 = 9.9719e-04
Loss = 2.5244e-01, PNorm = 85.1652, GNorm = 1.0599, lr_0 = 9.9651e-04
Loss = 2.2715e-01, PNorm = 85.3096, GNorm = 0.6068, lr_0 = 9.9583e-04
Loss = 2.8231e-01, PNorm = 85.4651, GNorm = 1.0324, lr_0 = 9.9515e-04
Loss = 2.8969e-01, PNorm = 85.6319, GNorm = 0.9517, lr_0 = 9.9446e-04
Loss = 2.9917e-01, PNorm = 85.8064, GNorm = 0.8848, lr_0 = 9.9378e-04
Loss = 2.5654e-01, PNorm = 85.9947, GNorm = 0.6360, lr_0 = 9.9310e-04
Loss = 2.4349e-01, PNorm = 86.1749, GNorm = 0.7575, lr_0 = 9.9242e-04
Loss = 2.9030e-01, PNorm = 86.3332, GNorm = 1.0061, lr_0 = 9.9174e-04
Loss = 2.8972e-01, PNorm = 86.5075, GNorm = 0.8780, lr_0 = 9.9106e-04
Loss = 2.7407e-01, PNorm = 86.6624, GNorm = 1.1729, lr_0 = 9.9038e-04
Loss = 3.0948e-01, PNorm = 86.8379, GNorm = 0.9595, lr_0 = 9.8971e-04
Loss = 2.6718e-01, PNorm = 86.9977, GNorm = 1.0289, lr_0 = 9.8903e-04
Loss = 2.8504e-01, PNorm = 87.1660, GNorm = 0.9665, lr_0 = 9.8835e-04
Loss = 2.6539e-01, PNorm = 87.3429, GNorm = 1.4436, lr_0 = 9.8767e-04
Loss = 3.0577e-01, PNorm = 87.4976, GNorm = 2.2317, lr_0 = 9.8700e-04
Loss = 3.0032e-01, PNorm = 87.6829, GNorm = 1.4591, lr_0 = 9.8632e-04
Loss = 3.2104e-01, PNorm = 87.8559, GNorm = 0.9257, lr_0 = 9.8564e-04
Loss = 3.0256e-01, PNorm = 88.0456, GNorm = 0.7060, lr_0 = 9.8497e-04
Loss = 2.9822e-01, PNorm = 88.2297, GNorm = 0.9407, lr_0 = 9.8429e-04
Loss = 2.8187e-01, PNorm = 88.4065, GNorm = 1.2560, lr_0 = 9.8362e-04
Loss = 2.9540e-01, PNorm = 88.5970, GNorm = 1.3291, lr_0 = 9.8295e-04
Loss = 3.0812e-01, PNorm = 88.7680, GNorm = 1.0458, lr_0 = 9.8227e-04
Loss = 3.0190e-01, PNorm = 88.9634, GNorm = 1.2725, lr_0 = 9.8160e-04
Loss = 2.6832e-01, PNorm = 89.1318, GNorm = 0.8041, lr_0 = 9.8093e-04
Loss = 2.6090e-01, PNorm = 89.2977, GNorm = 0.7866, lr_0 = 9.8026e-04
Loss = 2.7773e-01, PNorm = 89.4637, GNorm = 1.0944, lr_0 = 9.7958e-04
Loss = 2.7349e-01, PNorm = 89.6311, GNorm = 1.1464, lr_0 = 9.7891e-04
Loss = 2.8351e-01, PNorm = 89.7960, GNorm = 1.0056, lr_0 = 9.7824e-04
Loss = 2.6432e-01, PNorm = 89.9679, GNorm = 0.9665, lr_0 = 9.7757e-04
Loss = 3.0256e-01, PNorm = 90.1442, GNorm = 1.0608, lr_0 = 9.7690e-04
Loss = 3.0556e-01, PNorm = 90.3168, GNorm = 0.9696, lr_0 = 9.7623e-04
Loss = 2.7236e-01, PNorm = 90.4865, GNorm = 0.9893, lr_0 = 9.7556e-04
Loss = 2.7598e-01, PNorm = 90.6406, GNorm = 1.0215, lr_0 = 9.7490e-04
Loss = 2.8458e-01, PNorm = 90.8010, GNorm = 0.9151, lr_0 = 9.7423e-04
Loss = 3.3763e-01, PNorm = 90.9741, GNorm = 1.0112, lr_0 = 9.7356e-04
Loss = 2.9655e-01, PNorm = 91.1366, GNorm = 1.0438, lr_0 = 9.7289e-04
Loss = 2.7505e-01, PNorm = 91.3047, GNorm = 0.8614, lr_0 = 9.7223e-04
Loss = 2.8125e-01, PNorm = 91.4526, GNorm = 0.8671, lr_0 = 9.7156e-04
Loss = 3.3622e-01, PNorm = 91.6230, GNorm = 1.1811, lr_0 = 9.7090e-04
Loss = 3.1478e-01, PNorm = 91.7878, GNorm = 1.3583, lr_0 = 9.7023e-04
Loss = 3.1874e-01, PNorm = 91.9773, GNorm = 0.9716, lr_0 = 9.6957e-04
Loss = 3.0156e-01, PNorm = 92.1418, GNorm = 0.9818, lr_0 = 9.6890e-04
Loss = 3.4491e-01, PNorm = 92.3241, GNorm = 1.4358, lr_0 = 9.6824e-04
Loss = 2.7175e-01, PNorm = 92.4821, GNorm = 0.9390, lr_0 = 9.6757e-04
Loss = 3.3522e-01, PNorm = 92.6474, GNorm = 0.9510, lr_0 = 9.6691e-04
Loss = 3.4366e-01, PNorm = 92.8057, GNorm = 1.0059, lr_0 = 9.6625e-04
Loss = 2.9209e-01, PNorm = 92.9686, GNorm = 1.0692, lr_0 = 9.6559e-04
Loss = 3.3823e-01, PNorm = 93.1496, GNorm = 0.9991, lr_0 = 9.6493e-04
Loss = 3.0097e-01, PNorm = 93.3093, GNorm = 1.5904, lr_0 = 9.6427e-04
Loss = 3.0166e-01, PNorm = 93.4947, GNorm = 1.5915, lr_0 = 9.6360e-04
Loss = 3.3725e-01, PNorm = 93.6527, GNorm = 0.9370, lr_0 = 9.6294e-04
Loss = 3.0462e-01, PNorm = 93.8173, GNorm = 1.0507, lr_0 = 9.6228e-04
Loss = 3.3594e-01, PNorm = 93.9937, GNorm = 1.2512, lr_0 = 9.6163e-04
Loss = 3.5948e-01, PNorm = 94.1703, GNorm = 0.9946, lr_0 = 9.6097e-04
Loss = 3.5240e-01, PNorm = 94.3605, GNorm = 1.2288, lr_0 = 9.6031e-04
Loss = 3.4217e-01, PNorm = 94.5550, GNorm = 1.0459, lr_0 = 9.5965e-04
Loss = 3.0086e-01, PNorm = 94.7477, GNorm = 1.4851, lr_0 = 9.5899e-04
Loss = 3.2774e-01, PNorm = 94.9225, GNorm = 1.6954, lr_0 = 9.5834e-04
Loss = 3.4995e-01, PNorm = 95.0953, GNorm = 0.9954, lr_0 = 9.5768e-04
Loss = 3.7854e-01, PNorm = 95.2808, GNorm = 1.3448, lr_0 = 9.5702e-04
Loss = 3.0996e-01, PNorm = 95.4489, GNorm = 1.5279, lr_0 = 9.5637e-04
Loss = 3.4639e-01, PNorm = 95.6122, GNorm = 1.8942, lr_0 = 9.5571e-04
Loss = 3.0718e-01, PNorm = 95.7742, GNorm = 1.0426, lr_0 = 9.5506e-04
Loss = 3.8502e-01, PNorm = 95.9225, GNorm = 0.9046, lr_0 = 9.5440e-04
Loss = 3.1708e-01, PNorm = 96.0804, GNorm = 1.0556, lr_0 = 9.5375e-04
Loss = 3.1081e-01, PNorm = 96.2093, GNorm = 0.8111, lr_0 = 9.5310e-04
Loss = 3.3865e-01, PNorm = 96.3486, GNorm = 1.1686, lr_0 = 9.5244e-04
Loss = 3.1963e-01, PNorm = 96.4940, GNorm = 0.8120, lr_0 = 9.5179e-04
Loss = 3.1708e-01, PNorm = 96.6354, GNorm = 1.2265, lr_0 = 9.5114e-04
Loss = 3.6150e-01, PNorm = 96.7907, GNorm = 0.9443, lr_0 = 9.5049e-04
Loss = 3.1833e-01, PNorm = 96.9390, GNorm = 1.0732, lr_0 = 9.4984e-04
Loss = 3.5047e-01, PNorm = 97.0948, GNorm = 1.1030, lr_0 = 9.4919e-04
Loss = 3.4382e-01, PNorm = 97.2557, GNorm = 0.8072, lr_0 = 9.4854e-04
Loss = 3.3695e-01, PNorm = 97.4077, GNorm = 1.1436, lr_0 = 9.4789e-04
Loss = 3.6399e-01, PNorm = 97.5680, GNorm = 1.4373, lr_0 = 9.4724e-04
Loss = 3.5008e-01, PNorm = 97.7305, GNorm = 1.4284, lr_0 = 9.4659e-04
Loss = 3.6329e-01, PNorm = 97.9009, GNorm = 0.8921, lr_0 = 9.4594e-04
Loss = 3.4327e-01, PNorm = 98.0776, GNorm = 1.1403, lr_0 = 9.4529e-04
Loss = 3.2680e-01, PNorm = 98.2388, GNorm = 1.2185, lr_0 = 9.4464e-04
Loss = 3.1780e-01, PNorm = 98.4044, GNorm = 1.0180, lr_0 = 9.4400e-04
Loss = 3.5728e-01, PNorm = 98.5640, GNorm = 0.9722, lr_0 = 9.4335e-04
Loss = 3.3398e-01, PNorm = 98.7252, GNorm = 1.2897, lr_0 = 9.4270e-04
Loss = 3.3754e-01, PNorm = 98.8839, GNorm = 0.9686, lr_0 = 9.4206e-04
Loss = 3.0267e-01, PNorm = 99.0337, GNorm = 1.2094, lr_0 = 9.4141e-04
Loss = 3.3474e-01, PNorm = 99.1980, GNorm = 0.9777, lr_0 = 9.4077e-04
Loss = 3.1391e-01, PNorm = 99.3562, GNorm = 0.8713, lr_0 = 9.4012e-04
Loss = 2.9575e-01, PNorm = 99.4930, GNorm = 1.0635, lr_0 = 9.3948e-04
Loss = 3.2124e-01, PNorm = 99.6445, GNorm = 1.3065, lr_0 = 9.3884e-04
Loss = 3.2400e-01, PNorm = 99.7818, GNorm = 0.9363, lr_0 = 9.3819e-04
Loss = 3.6785e-01, PNorm = 99.9267, GNorm = 1.9845, lr_0 = 9.3755e-04
Loss = 2.9195e-01, PNorm = 100.0690, GNorm = 0.7102, lr_0 = 9.3691e-04
Loss = 3.4563e-01, PNorm = 100.2179, GNorm = 0.8846, lr_0 = 9.3627e-04
Loss = 3.3285e-01, PNorm = 100.3785, GNorm = 1.5585, lr_0 = 9.3562e-04
Loss = 2.9137e-01, PNorm = 100.5175, GNorm = 0.9735, lr_0 = 9.3498e-04
Loss = 3.5570e-01, PNorm = 100.6526, GNorm = 0.8671, lr_0 = 9.3434e-04
Loss = 2.9590e-01, PNorm = 100.8034, GNorm = 0.9462, lr_0 = 9.3370e-04
Loss = 3.0407e-01, PNorm = 100.9297, GNorm = 1.2873, lr_0 = 9.3306e-04
Loss = 3.6711e-01, PNorm = 101.0726, GNorm = 1.4407, lr_0 = 9.3242e-04
Loss = 3.9387e-01, PNorm = 101.2236, GNorm = 1.5287, lr_0 = 9.3178e-04
Loss = 3.2623e-01, PNorm = 101.3777, GNorm = 1.4521, lr_0 = 9.3115e-04
Loss = 3.1677e-01, PNorm = 101.5231, GNorm = 1.1447, lr_0 = 9.3051e-04
Loss = 3.5756e-01, PNorm = 101.6724, GNorm = 1.5680, lr_0 = 9.2987e-04
Loss = 3.0121e-01, PNorm = 101.8166, GNorm = 1.2844, lr_0 = 9.2923e-04
Loss = 3.5360e-01, PNorm = 101.9637, GNorm = 0.9759, lr_0 = 9.2860e-04
Loss = 3.1979e-01, PNorm = 102.0978, GNorm = 0.9748, lr_0 = 9.2796e-04
Loss = 3.4761e-01, PNorm = 102.2399, GNorm = 1.0211, lr_0 = 9.2733e-04
Loss = 3.0956e-01, PNorm = 102.3740, GNorm = 1.3871, lr_0 = 9.2669e-04
Loss = 3.0182e-01, PNorm = 102.5154, GNorm = 0.7617, lr_0 = 9.2606e-04
Loss = 3.1746e-01, PNorm = 102.6547, GNorm = 0.9085, lr_0 = 9.2542e-04
Loss = 3.2246e-01, PNorm = 102.8031, GNorm = 1.1299, lr_0 = 9.2479e-04
Loss = 3.1299e-01, PNorm = 102.9479, GNorm = 1.2864, lr_0 = 9.2415e-04
Loss = 3.5721e-01, PNorm = 103.0930, GNorm = 1.1581, lr_0 = 9.2352e-04
Loss = 3.9947e-01, PNorm = 103.2383, GNorm = 1.5617, lr_0 = 9.2289e-04
Loss = 3.1752e-01, PNorm = 103.3677, GNorm = 1.1542, lr_0 = 9.2226e-04
Loss = 3.0089e-01, PNorm = 103.5114, GNorm = 1.2533, lr_0 = 9.2162e-04
Loss = 3.3027e-01, PNorm = 103.6551, GNorm = 1.6334, lr_0 = 9.2099e-04
Validation mae = 0.126241
Epoch 3
Loss = 2.0980e-01, PNorm = 103.7866, GNorm = 1.2985, lr_0 = 9.2036e-04
Loss = 2.0113e-01, PNorm = 103.9142, GNorm = 1.4262, lr_0 = 9.1973e-04
Loss = 1.8302e-01, PNorm = 104.0134, GNorm = 1.3390, lr_0 = 9.1910e-04
Loss = 1.7044e-01, PNorm = 104.1110, GNorm = 0.9159, lr_0 = 9.1847e-04
Loss = 1.5785e-01, PNorm = 104.2082, GNorm = 0.5561, lr_0 = 9.1784e-04
Loss = 1.5003e-01, PNorm = 104.2958, GNorm = 0.6329, lr_0 = 9.1721e-04
Loss = 1.7760e-01, PNorm = 104.3882, GNorm = 0.9175, lr_0 = 9.1658e-04
Loss = 1.7409e-01, PNorm = 104.4800, GNorm = 1.2555, lr_0 = 9.1596e-04
Loss = 1.6888e-01, PNorm = 104.5668, GNorm = 0.6728, lr_0 = 9.1533e-04
Loss = 1.6706e-01, PNorm = 104.6619, GNorm = 0.5781, lr_0 = 9.1470e-04
Loss = 1.7750e-01, PNorm = 104.7695, GNorm = 0.8931, lr_0 = 9.1408e-04
Loss = 1.8540e-01, PNorm = 104.8769, GNorm = 0.7009, lr_0 = 9.1345e-04
Loss = 1.6132e-01, PNorm = 104.9829, GNorm = 0.7126, lr_0 = 9.1282e-04
Loss = 1.8520e-01, PNorm = 105.0825, GNorm = 0.7812, lr_0 = 9.1220e-04
Loss = 1.4776e-01, PNorm = 105.1992, GNorm = 0.5418, lr_0 = 9.1157e-04
Loss = 1.6318e-01, PNorm = 105.3097, GNorm = 0.8939, lr_0 = 9.1095e-04
Loss = 1.8941e-01, PNorm = 105.4108, GNorm = 1.0023, lr_0 = 9.1032e-04
Loss = 2.0133e-01, PNorm = 105.5172, GNorm = 0.7567, lr_0 = 9.0970e-04
Loss = 1.6740e-01, PNorm = 105.6316, GNorm = 1.1208, lr_0 = 9.0908e-04
Loss = 1.7467e-01, PNorm = 105.7399, GNorm = 1.1236, lr_0 = 9.0846e-04
Loss = 1.8439e-01, PNorm = 105.8468, GNorm = 0.5926, lr_0 = 9.0783e-04
Loss = 1.8633e-01, PNorm = 105.9604, GNorm = 0.8506, lr_0 = 9.0721e-04
Loss = 1.6776e-01, PNorm = 106.0658, GNorm = 0.7816, lr_0 = 9.0659e-04
Loss = 2.2048e-01, PNorm = 106.1741, GNorm = 0.9840, lr_0 = 9.0597e-04
Loss = 1.9796e-01, PNorm = 106.2903, GNorm = 0.8016, lr_0 = 9.0535e-04
Loss = 1.7525e-01, PNorm = 106.4078, GNorm = 1.3628, lr_0 = 9.0473e-04
Loss = 1.6299e-01, PNorm = 106.5296, GNorm = 1.0169, lr_0 = 9.0411e-04
Loss = 1.7512e-01, PNorm = 106.6345, GNorm = 0.8817, lr_0 = 9.0349e-04
Loss = 1.5863e-01, PNorm = 106.7419, GNorm = 0.7216, lr_0 = 9.0287e-04
Loss = 1.6257e-01, PNorm = 106.8495, GNorm = 0.9113, lr_0 = 9.0225e-04
Loss = 1.6141e-01, PNorm = 106.9528, GNorm = 0.6784, lr_0 = 9.0163e-04
Loss = 1.8579e-01, PNorm = 107.0414, GNorm = 1.0199, lr_0 = 9.0102e-04
Loss = 2.1975e-01, PNorm = 107.1529, GNorm = 0.7700, lr_0 = 9.0040e-04
Loss = 1.7947e-01, PNorm = 107.2530, GNorm = 0.6435, lr_0 = 8.9978e-04
Loss = 1.8726e-01, PNorm = 107.3641, GNorm = 0.8396, lr_0 = 8.9916e-04
Loss = 1.9656e-01, PNorm = 107.4791, GNorm = 0.7428, lr_0 = 8.9855e-04
Loss = 2.0028e-01, PNorm = 107.5974, GNorm = 0.7802, lr_0 = 8.9793e-04
Loss = 1.6949e-01, PNorm = 107.7119, GNorm = 1.0167, lr_0 = 8.9732e-04
Loss = 1.8964e-01, PNorm = 107.8299, GNorm = 0.7811, lr_0 = 8.9670e-04
Loss = 1.8352e-01, PNorm = 107.9566, GNorm = 0.9022, lr_0 = 8.9609e-04
Loss = 1.9975e-01, PNorm = 108.0704, GNorm = 0.8334, lr_0 = 8.9548e-04
Loss = 1.9215e-01, PNorm = 108.1894, GNorm = 1.1417, lr_0 = 8.9486e-04
Loss = 1.8905e-01, PNorm = 108.3086, GNorm = 0.6212, lr_0 = 8.9425e-04
Loss = 2.3014e-01, PNorm = 108.4399, GNorm = 0.9321, lr_0 = 8.9364e-04
Loss = 2.2951e-01, PNorm = 108.5601, GNorm = 0.9702, lr_0 = 8.9302e-04
Loss = 2.0606e-01, PNorm = 108.6846, GNorm = 0.9298, lr_0 = 8.9241e-04
Loss = 1.9016e-01, PNorm = 108.8191, GNorm = 1.1121, lr_0 = 8.9180e-04
Loss = 1.8611e-01, PNorm = 108.9339, GNorm = 1.0423, lr_0 = 8.9119e-04
Loss = 2.1098e-01, PNorm = 109.0682, GNorm = 0.6197, lr_0 = 8.9058e-04
Loss = 1.9023e-01, PNorm = 109.1804, GNorm = 1.0867, lr_0 = 8.8997e-04
Loss = 2.0973e-01, PNorm = 109.2947, GNorm = 0.6272, lr_0 = 8.8936e-04
Loss = 2.4715e-01, PNorm = 109.4094, GNorm = 0.7020, lr_0 = 8.8875e-04
Loss = 1.9436e-01, PNorm = 109.5276, GNorm = 0.7038, lr_0 = 8.8814e-04
Loss = 1.7402e-01, PNorm = 109.6521, GNorm = 0.6505, lr_0 = 8.8753e-04
Loss = 2.0023e-01, PNorm = 109.7681, GNorm = 0.8617, lr_0 = 8.8693e-04
Loss = 1.8309e-01, PNorm = 109.8950, GNorm = 0.6707, lr_0 = 8.8632e-04
Loss = 2.3950e-01, PNorm = 110.0122, GNorm = 2.7621, lr_0 = 8.8571e-04
Loss = 1.7552e-01, PNorm = 110.1498, GNorm = 1.2166, lr_0 = 8.8510e-04
Loss = 2.0162e-01, PNorm = 110.3002, GNorm = 0.9231, lr_0 = 8.8450e-04
Loss = 1.8692e-01, PNorm = 110.4225, GNorm = 1.3309, lr_0 = 8.8389e-04
Loss = 1.8347e-01, PNorm = 110.5454, GNorm = 0.8453, lr_0 = 8.8329e-04
Loss = 2.2054e-01, PNorm = 110.6601, GNorm = 0.7962, lr_0 = 8.8268e-04
Loss = 2.0213e-01, PNorm = 110.7905, GNorm = 1.0807, lr_0 = 8.8208e-04
Loss = 2.4521e-01, PNorm = 110.8969, GNorm = 0.8480, lr_0 = 8.8147e-04
Loss = 2.2445e-01, PNorm = 111.0237, GNorm = 0.7822, lr_0 = 8.8087e-04
Loss = 1.8629e-01, PNorm = 111.1476, GNorm = 0.9568, lr_0 = 8.8026e-04
Loss = 2.1403e-01, PNorm = 111.2670, GNorm = 0.7956, lr_0 = 8.7966e-04
Loss = 2.1653e-01, PNorm = 111.3866, GNorm = 0.8766, lr_0 = 8.7906e-04
Loss = 1.7242e-01, PNorm = 111.5152, GNorm = 0.9358, lr_0 = 8.7846e-04
Loss = 2.2492e-01, PNorm = 111.6379, GNorm = 0.9108, lr_0 = 8.7785e-04
Loss = 2.4638e-01, PNorm = 111.7718, GNorm = 0.8489, lr_0 = 8.7725e-04
Loss = 2.1655e-01, PNorm = 111.9147, GNorm = 0.7566, lr_0 = 8.7665e-04
Loss = 2.1749e-01, PNorm = 112.0383, GNorm = 0.8536, lr_0 = 8.7605e-04
Loss = 2.3638e-01, PNorm = 112.1670, GNorm = 1.0477, lr_0 = 8.7545e-04
Loss = 2.0686e-01, PNorm = 112.3052, GNorm = 0.7941, lr_0 = 8.7485e-04
Loss = 2.1580e-01, PNorm = 112.4479, GNorm = 0.9627, lr_0 = 8.7425e-04
Loss = 2.6189e-01, PNorm = 112.5909, GNorm = 0.8669, lr_0 = 8.7365e-04
Loss = 2.0940e-01, PNorm = 112.7326, GNorm = 0.8941, lr_0 = 8.7306e-04
Loss = 2.1685e-01, PNorm = 112.8551, GNorm = 1.0672, lr_0 = 8.7246e-04
Loss = 2.2797e-01, PNorm = 112.9815, GNorm = 1.7779, lr_0 = 8.7186e-04
Loss = 2.0191e-01, PNorm = 113.1081, GNorm = 0.8784, lr_0 = 8.7126e-04
Loss = 1.9081e-01, PNorm = 113.2336, GNorm = 0.6965, lr_0 = 8.7067e-04
Loss = 2.0542e-01, PNorm = 113.3603, GNorm = 1.2164, lr_0 = 8.7007e-04
Loss = 2.2654e-01, PNorm = 113.4872, GNorm = 1.0529, lr_0 = 8.6947e-04
Loss = 1.8327e-01, PNorm = 113.6130, GNorm = 0.9771, lr_0 = 8.6888e-04
Loss = 1.8526e-01, PNorm = 113.7315, GNorm = 1.3222, lr_0 = 8.6828e-04
Loss = 2.4434e-01, PNorm = 113.8433, GNorm = 1.0413, lr_0 = 8.6769e-04
Loss = 2.0260e-01, PNorm = 113.9613, GNorm = 0.9888, lr_0 = 8.6709e-04
Loss = 2.0745e-01, PNorm = 114.0846, GNorm = 0.9535, lr_0 = 8.6650e-04
Loss = 1.7526e-01, PNorm = 114.2076, GNorm = 0.7027, lr_0 = 8.6590e-04
Loss = 1.9992e-01, PNorm = 114.3293, GNorm = 0.8591, lr_0 = 8.6531e-04
Loss = 2.1881e-01, PNorm = 114.4510, GNorm = 1.0200, lr_0 = 8.6472e-04
Loss = 2.2633e-01, PNorm = 114.5718, GNorm = 1.9264, lr_0 = 8.6413e-04
Loss = 2.0546e-01, PNorm = 114.7015, GNorm = 0.9465, lr_0 = 8.6353e-04
Loss = 2.2457e-01, PNorm = 114.8364, GNorm = 1.1817, lr_0 = 8.6294e-04
Loss = 2.3711e-01, PNorm = 114.9567, GNorm = 1.4094, lr_0 = 8.6235e-04
Loss = 2.1995e-01, PNorm = 115.0861, GNorm = 0.9623, lr_0 = 8.6176e-04
Loss = 2.2681e-01, PNorm = 115.2166, GNorm = 1.1004, lr_0 = 8.6117e-04
Loss = 2.0613e-01, PNorm = 115.3503, GNorm = 1.0583, lr_0 = 8.6058e-04
Loss = 2.1310e-01, PNorm = 115.4855, GNorm = 1.1816, lr_0 = 8.5999e-04
Loss = 2.1755e-01, PNorm = 115.6166, GNorm = 1.6789, lr_0 = 8.5940e-04
Loss = 2.3831e-01, PNorm = 115.7439, GNorm = 0.8971, lr_0 = 8.5881e-04
Loss = 1.9370e-01, PNorm = 115.8739, GNorm = 0.8278, lr_0 = 8.5823e-04
Loss = 2.1376e-01, PNorm = 116.0024, GNorm = 1.1930, lr_0 = 8.5764e-04
Loss = 2.7532e-01, PNorm = 116.1397, GNorm = 1.7050, lr_0 = 8.5705e-04
Loss = 2.1805e-01, PNorm = 116.2873, GNorm = 1.1881, lr_0 = 8.5646e-04
Loss = 2.0976e-01, PNorm = 116.4270, GNorm = 0.7650, lr_0 = 8.5588e-04
Loss = 2.2850e-01, PNorm = 116.5562, GNorm = 1.0312, lr_0 = 8.5529e-04
Loss = 2.1181e-01, PNorm = 116.6729, GNorm = 1.0367, lr_0 = 8.5470e-04
Loss = 1.7879e-01, PNorm = 116.7933, GNorm = 0.8433, lr_0 = 8.5412e-04
Loss = 2.1692e-01, PNorm = 116.9051, GNorm = 1.1161, lr_0 = 8.5353e-04
Loss = 3.1640e-01, PNorm = 117.0339, GNorm = 1.0917, lr_0 = 8.5295e-04
Loss = 2.0120e-01, PNorm = 117.1768, GNorm = 0.7666, lr_0 = 8.5236e-04
Loss = 2.4092e-01, PNorm = 117.2994, GNorm = 0.7394, lr_0 = 8.5178e-04
Loss = 2.2455e-01, PNorm = 117.4314, GNorm = 0.6935, lr_0 = 8.5120e-04
Loss = 2.0268e-01, PNorm = 117.5591, GNorm = 0.9001, lr_0 = 8.5061e-04
Loss = 2.2388e-01, PNorm = 117.6919, GNorm = 1.1805, lr_0 = 8.5003e-04
Loss = 2.1990e-01, PNorm = 117.8248, GNorm = 0.7441, lr_0 = 8.4945e-04
Loss = 2.2708e-01, PNorm = 117.9500, GNorm = 1.2080, lr_0 = 8.4887e-04
Loss = 1.9337e-01, PNorm = 118.0807, GNorm = 0.9552, lr_0 = 8.4828e-04
Validation mae = 0.124444
Epoch 4
Loss = 1.3763e-01, PNorm = 118.1816, GNorm = 0.7296, lr_0 = 8.4770e-04
Loss = 1.4736e-01, PNorm = 118.2745, GNorm = 0.8104, lr_0 = 8.4712e-04
Loss = 1.1076e-01, PNorm = 118.3502, GNorm = 0.7266, lr_0 = 8.4654e-04
Loss = 1.0869e-01, PNorm = 118.4236, GNorm = 0.5022, lr_0 = 8.4596e-04
Loss = 1.2664e-01, PNorm = 118.5005, GNorm = 0.5480, lr_0 = 8.4538e-04
Loss = 1.1277e-01, PNorm = 118.5712, GNorm = 0.8515, lr_0 = 8.4480e-04
Loss = 1.5345e-01, PNorm = 118.6457, GNorm = 0.7991, lr_0 = 8.4423e-04
Loss = 1.1548e-01, PNorm = 118.7265, GNorm = 0.9641, lr_0 = 8.4365e-04
Loss = 1.1493e-01, PNorm = 118.8148, GNorm = 0.6936, lr_0 = 8.4307e-04
Loss = 1.4700e-01, PNorm = 118.8863, GNorm = 0.6694, lr_0 = 8.4249e-04
Loss = 1.3240e-01, PNorm = 118.9793, GNorm = 1.2842, lr_0 = 8.4191e-04
Loss = 1.3062e-01, PNorm = 119.0613, GNorm = 0.5618, lr_0 = 8.4134e-04
Loss = 1.0988e-01, PNorm = 119.1431, GNorm = 0.6151, lr_0 = 8.4076e-04
Loss = 1.0710e-01, PNorm = 119.2210, GNorm = 0.6029, lr_0 = 8.4019e-04
Loss = 1.1593e-01, PNorm = 119.2980, GNorm = 0.7100, lr_0 = 8.3961e-04
Loss = 1.0198e-01, PNorm = 119.3769, GNorm = 0.6263, lr_0 = 8.3903e-04
Loss = 1.2314e-01, PNorm = 119.4516, GNorm = 0.6894, lr_0 = 8.3846e-04
Loss = 1.1740e-01, PNorm = 119.5301, GNorm = 0.6078, lr_0 = 8.3789e-04
Loss = 1.2576e-01, PNorm = 119.6063, GNorm = 0.6498, lr_0 = 8.3731e-04
Loss = 1.0758e-01, PNorm = 119.6973, GNorm = 0.7051, lr_0 = 8.3674e-04
Loss = 1.0849e-01, PNorm = 119.7745, GNorm = 0.6325, lr_0 = 8.3616e-04
Loss = 1.1814e-01, PNorm = 119.8510, GNorm = 0.6435, lr_0 = 8.3559e-04
Loss = 1.1990e-01, PNorm = 119.9279, GNorm = 0.6445, lr_0 = 8.3502e-04
Loss = 1.1255e-01, PNorm = 120.0064, GNorm = 0.6040, lr_0 = 8.3445e-04
Loss = 1.3680e-01, PNorm = 120.0835, GNorm = 0.6865, lr_0 = 8.3388e-04
Loss = 1.2999e-01, PNorm = 120.1634, GNorm = 0.5980, lr_0 = 8.3330e-04
Loss = 1.0127e-01, PNorm = 120.2467, GNorm = 0.4873, lr_0 = 8.3273e-04
Loss = 1.4636e-01, PNorm = 120.3274, GNorm = 1.2855, lr_0 = 8.3216e-04
Loss = 1.1343e-01, PNorm = 120.4001, GNorm = 0.5479, lr_0 = 8.3159e-04
Loss = 1.3544e-01, PNorm = 120.4938, GNorm = 0.8045, lr_0 = 8.3102e-04
Loss = 1.2388e-01, PNorm = 120.5773, GNorm = 0.7728, lr_0 = 8.3045e-04
Loss = 1.2911e-01, PNorm = 120.6699, GNorm = 1.0410, lr_0 = 8.2988e-04
Loss = 1.0576e-01, PNorm = 120.7612, GNorm = 0.8960, lr_0 = 8.2932e-04
Loss = 1.3987e-01, PNorm = 120.8463, GNorm = 1.1313, lr_0 = 8.2875e-04
Loss = 1.2091e-01, PNorm = 120.9311, GNorm = 0.6925, lr_0 = 8.2818e-04
Loss = 1.2499e-01, PNorm = 121.0203, GNorm = 0.9954, lr_0 = 8.2761e-04
Loss = 1.1954e-01, PNorm = 121.1090, GNorm = 0.9151, lr_0 = 8.2705e-04
Loss = 1.0349e-01, PNorm = 121.2049, GNorm = 0.6006, lr_0 = 8.2648e-04
Loss = 1.2560e-01, PNorm = 121.2808, GNorm = 0.9716, lr_0 = 8.2591e-04
Loss = 1.1683e-01, PNorm = 121.3677, GNorm = 0.7836, lr_0 = 8.2535e-04
Loss = 1.0857e-01, PNorm = 121.4548, GNorm = 0.6347, lr_0 = 8.2478e-04
Loss = 1.1113e-01, PNorm = 121.5415, GNorm = 0.7711, lr_0 = 8.2422e-04
Loss = 1.1109e-01, PNorm = 121.6273, GNorm = 0.6841, lr_0 = 8.2365e-04
Loss = 1.2445e-01, PNorm = 121.7134, GNorm = 1.0574, lr_0 = 8.2309e-04
Loss = 1.2290e-01, PNorm = 121.8027, GNorm = 0.7155, lr_0 = 8.2252e-04
Loss = 1.2346e-01, PNorm = 121.8836, GNorm = 0.4297, lr_0 = 8.2196e-04
Loss = 1.3662e-01, PNorm = 121.9646, GNorm = 0.4825, lr_0 = 8.2140e-04
Loss = 1.2785e-01, PNorm = 122.0454, GNorm = 0.7764, lr_0 = 8.2084e-04
Loss = 1.3976e-01, PNorm = 122.1386, GNorm = 0.6999, lr_0 = 8.2027e-04
Loss = 1.3291e-01, PNorm = 122.2321, GNorm = 0.9054, lr_0 = 8.1971e-04
Loss = 1.1586e-01, PNorm = 122.3279, GNorm = 0.5332, lr_0 = 8.1915e-04
Loss = 1.2876e-01, PNorm = 122.4208, GNorm = 0.6377, lr_0 = 8.1859e-04
Loss = 1.1833e-01, PNorm = 122.5029, GNorm = 0.7666, lr_0 = 8.1803e-04
Loss = 1.2364e-01, PNorm = 122.5866, GNorm = 0.7283, lr_0 = 8.1747e-04
Loss = 1.3349e-01, PNorm = 122.6782, GNorm = 0.7951, lr_0 = 8.1691e-04
Loss = 1.2183e-01, PNorm = 122.7644, GNorm = 0.6031, lr_0 = 8.1635e-04
Loss = 1.2535e-01, PNorm = 122.8570, GNorm = 0.8400, lr_0 = 8.1579e-04
Loss = 1.1478e-01, PNorm = 122.9434, GNorm = 0.7781, lr_0 = 8.1523e-04
Loss = 1.3692e-01, PNorm = 123.0296, GNorm = 0.8744, lr_0 = 8.1467e-04
Loss = 1.3776e-01, PNorm = 123.1134, GNorm = 0.6286, lr_0 = 8.1411e-04
Loss = 1.5289e-01, PNorm = 123.2048, GNorm = 0.6515, lr_0 = 8.1355e-04
Loss = 1.2389e-01, PNorm = 123.3012, GNorm = 0.6952, lr_0 = 8.1300e-04
Loss = 1.6999e-01, PNorm = 123.4034, GNorm = 1.0693, lr_0 = 8.1244e-04
Loss = 1.3205e-01, PNorm = 123.4976, GNorm = 0.8987, lr_0 = 8.1188e-04
Loss = 1.6502e-01, PNorm = 123.5978, GNorm = 0.5236, lr_0 = 8.1133e-04
Loss = 1.4000e-01, PNorm = 123.7067, GNorm = 0.6766, lr_0 = 8.1077e-04
Loss = 1.2343e-01, PNorm = 123.7921, GNorm = 0.7140, lr_0 = 8.1022e-04
Loss = 1.2982e-01, PNorm = 123.8883, GNorm = 1.0519, lr_0 = 8.0966e-04
Loss = 1.3829e-01, PNorm = 123.9789, GNorm = 0.9751, lr_0 = 8.0911e-04
Loss = 1.2757e-01, PNorm = 124.0687, GNorm = 1.1530, lr_0 = 8.0855e-04
Loss = 1.5789e-01, PNorm = 124.1668, GNorm = 0.4663, lr_0 = 8.0800e-04
Loss = 1.4126e-01, PNorm = 124.2698, GNorm = 0.9257, lr_0 = 8.0745e-04
Loss = 1.6660e-01, PNorm = 124.3737, GNorm = 0.7493, lr_0 = 8.0689e-04
Loss = 1.3163e-01, PNorm = 124.4800, GNorm = 0.6952, lr_0 = 8.0634e-04
Loss = 1.1940e-01, PNorm = 124.5845, GNorm = 0.6726, lr_0 = 8.0579e-04
Loss = 1.4451e-01, PNorm = 124.6788, GNorm = 0.6185, lr_0 = 8.0523e-04
Loss = 1.4161e-01, PNorm = 124.7737, GNorm = 1.1137, lr_0 = 8.0468e-04
Loss = 1.2981e-01, PNorm = 124.8733, GNorm = 0.9399, lr_0 = 8.0413e-04
Loss = 1.3906e-01, PNorm = 124.9548, GNorm = 0.8587, lr_0 = 8.0358e-04
Loss = 1.2933e-01, PNorm = 125.0494, GNorm = 0.6935, lr_0 = 8.0303e-04
Loss = 1.3678e-01, PNorm = 125.1454, GNorm = 0.5652, lr_0 = 8.0248e-04
Loss = 1.4503e-01, PNorm = 125.2477, GNorm = 0.8442, lr_0 = 8.0193e-04
Loss = 1.1521e-01, PNorm = 125.3431, GNorm = 0.8647, lr_0 = 8.0138e-04
Loss = 1.2911e-01, PNorm = 125.4360, GNorm = 0.9291, lr_0 = 8.0083e-04
Loss = 1.4488e-01, PNorm = 125.5325, GNorm = 1.2483, lr_0 = 8.0028e-04
Loss = 1.3731e-01, PNorm = 125.6323, GNorm = 0.8547, lr_0 = 7.9974e-04
Loss = 1.3342e-01, PNorm = 125.7351, GNorm = 1.0672, lr_0 = 7.9919e-04
Loss = 1.2937e-01, PNorm = 125.8279, GNorm = 0.9054, lr_0 = 7.9864e-04
Loss = 1.4440e-01, PNorm = 125.9205, GNorm = 0.6397, lr_0 = 7.9809e-04
Loss = 1.4285e-01, PNorm = 126.0249, GNorm = 0.6247, lr_0 = 7.9755e-04
Loss = 1.3766e-01, PNorm = 126.1238, GNorm = 0.7270, lr_0 = 7.9700e-04
Loss = 1.3097e-01, PNorm = 126.2279, GNorm = 1.3981, lr_0 = 7.9645e-04
Loss = 1.4364e-01, PNorm = 126.3352, GNorm = 0.8598, lr_0 = 7.9591e-04
Loss = 1.4883e-01, PNorm = 126.4431, GNorm = 0.5925, lr_0 = 7.9536e-04
Loss = 1.3378e-01, PNorm = 126.5476, GNorm = 0.8255, lr_0 = 7.9482e-04
Loss = 1.2662e-01, PNorm = 126.6316, GNorm = 0.8068, lr_0 = 7.9427e-04
Loss = 1.4651e-01, PNorm = 126.7276, GNorm = 0.6378, lr_0 = 7.9373e-04
Loss = 1.6879e-01, PNorm = 126.8239, GNorm = 1.0681, lr_0 = 7.9319e-04
Loss = 1.3513e-01, PNorm = 126.9230, GNorm = 0.6846, lr_0 = 7.9264e-04
Loss = 1.6298e-01, PNorm = 127.0234, GNorm = 1.4215, lr_0 = 7.9210e-04
Loss = 1.2971e-01, PNorm = 127.1330, GNorm = 1.1755, lr_0 = 7.9156e-04
Loss = 1.3201e-01, PNorm = 127.2307, GNorm = 0.8241, lr_0 = 7.9101e-04
Loss = 1.3097e-01, PNorm = 127.3380, GNorm = 0.5743, lr_0 = 7.9047e-04
Loss = 1.5795e-01, PNorm = 127.4319, GNorm = 0.9229, lr_0 = 7.8993e-04
Loss = 1.2559e-01, PNorm = 127.5438, GNorm = 0.7851, lr_0 = 7.8939e-04
Loss = 1.6881e-01, PNorm = 127.6480, GNorm = 0.6952, lr_0 = 7.8885e-04
Loss = 1.5409e-01, PNorm = 127.7585, GNorm = 0.7441, lr_0 = 7.8831e-04
Loss = 1.4909e-01, PNorm = 127.8682, GNorm = 0.8689, lr_0 = 7.8777e-04
Loss = 1.4995e-01, PNorm = 127.9811, GNorm = 0.8722, lr_0 = 7.8723e-04
Loss = 1.2968e-01, PNorm = 128.0849, GNorm = 0.7168, lr_0 = 7.8669e-04
Loss = 1.4946e-01, PNorm = 128.1964, GNorm = 1.0893, lr_0 = 7.8615e-04
Loss = 1.4089e-01, PNorm = 128.2997, GNorm = 0.8791, lr_0 = 7.8561e-04
Loss = 1.1698e-01, PNorm = 128.3954, GNorm = 0.6433, lr_0 = 7.8507e-04
Loss = 1.3080e-01, PNorm = 128.4960, GNorm = 0.9727, lr_0 = 7.8454e-04
Loss = 1.4001e-01, PNorm = 128.5996, GNorm = 0.8764, lr_0 = 7.8400e-04
Loss = 1.4538e-01, PNorm = 128.7082, GNorm = 0.9434, lr_0 = 7.8346e-04
Loss = 1.7393e-01, PNorm = 128.8121, GNorm = 0.8034, lr_0 = 7.8293e-04
Loss = 1.5687e-01, PNorm = 128.9239, GNorm = 0.6062, lr_0 = 7.8239e-04
Loss = 1.5042e-01, PNorm = 129.0280, GNorm = 0.9619, lr_0 = 7.8185e-04
Loss = 1.5942e-01, PNorm = 129.1358, GNorm = 1.3442, lr_0 = 7.8132e-04
Validation mae = 0.126427
Epoch 5
Loss = 8.1545e-02, PNorm = 129.2289, GNorm = 0.4506, lr_0 = 7.8078e-04
Loss = 9.0427e-02, PNorm = 129.3009, GNorm = 0.5328, lr_0 = 7.8025e-04
Loss = 9.0997e-02, PNorm = 129.3684, GNorm = 0.7770, lr_0 = 7.7971e-04
Loss = 8.1641e-02, PNorm = 129.4289, GNorm = 0.7858, lr_0 = 7.7918e-04
Loss = 7.6392e-02, PNorm = 129.4884, GNorm = 0.6915, lr_0 = 7.7864e-04
Loss = 8.0270e-02, PNorm = 129.5407, GNorm = 0.8871, lr_0 = 7.7811e-04
Loss = 1.0729e-01, PNorm = 129.6029, GNorm = 0.6413, lr_0 = 7.7758e-04
Loss = 8.7981e-02, PNorm = 129.6692, GNorm = 1.0789, lr_0 = 7.7705e-04
Loss = 9.3739e-02, PNorm = 129.7245, GNorm = 0.5660, lr_0 = 7.7651e-04
Loss = 8.5982e-02, PNorm = 129.7898, GNorm = 0.6552, lr_0 = 7.7598e-04
Loss = 8.4071e-02, PNorm = 129.8464, GNorm = 0.7049, lr_0 = 7.7545e-04
Loss = 1.0320e-01, PNorm = 129.9082, GNorm = 0.4780, lr_0 = 7.7492e-04
Loss = 7.1784e-02, PNorm = 129.9728, GNorm = 0.6552, lr_0 = 7.7439e-04
Loss = 7.4250e-02, PNorm = 130.0310, GNorm = 0.7881, lr_0 = 7.7386e-04
Loss = 7.5463e-02, PNorm = 130.0943, GNorm = 0.4563, lr_0 = 7.7333e-04
Loss = 8.4576e-02, PNorm = 130.1540, GNorm = 0.4962, lr_0 = 7.7280e-04
Loss = 7.6072e-02, PNorm = 130.2148, GNorm = 0.5313, lr_0 = 7.7227e-04
Loss = 8.5922e-02, PNorm = 130.2763, GNorm = 0.4054, lr_0 = 7.7174e-04
Loss = 7.7182e-02, PNorm = 130.3387, GNorm = 0.6175, lr_0 = 7.7121e-04
Loss = 8.2992e-02, PNorm = 130.3940, GNorm = 0.6118, lr_0 = 7.7068e-04
Loss = 1.0258e-01, PNorm = 130.4529, GNorm = 0.7705, lr_0 = 7.7015e-04
Loss = 9.4155e-02, PNorm = 130.5153, GNorm = 0.4085, lr_0 = 7.6963e-04
Loss = 8.1139e-02, PNorm = 130.5848, GNorm = 0.6776, lr_0 = 7.6910e-04
Loss = 8.8592e-02, PNorm = 130.6593, GNorm = 0.8457, lr_0 = 7.6857e-04
Loss = 7.4416e-02, PNorm = 130.7275, GNorm = 0.3843, lr_0 = 7.6805e-04
Loss = 7.7639e-02, PNorm = 130.7986, GNorm = 0.3842, lr_0 = 7.6752e-04
Loss = 7.8194e-02, PNorm = 130.8564, GNorm = 0.7390, lr_0 = 7.6699e-04
Loss = 7.5377e-02, PNorm = 130.9246, GNorm = 0.4400, lr_0 = 7.6647e-04
Loss = 8.1702e-02, PNorm = 130.9880, GNorm = 0.8172, lr_0 = 7.6594e-04
Loss = 6.9938e-02, PNorm = 131.0458, GNorm = 0.4852, lr_0 = 7.6542e-04
Loss = 8.8111e-02, PNorm = 131.1015, GNorm = 0.7870, lr_0 = 7.6489e-04
Loss = 8.1505e-02, PNorm = 131.1682, GNorm = 0.5550, lr_0 = 7.6437e-04
Loss = 9.0345e-02, PNorm = 131.2335, GNorm = 0.4116, lr_0 = 7.6385e-04
Loss = 7.7472e-02, PNorm = 131.3038, GNorm = 0.4884, lr_0 = 7.6332e-04
Loss = 8.6012e-02, PNorm = 131.3647, GNorm = 0.7389, lr_0 = 7.6280e-04
Loss = 7.4659e-02, PNorm = 131.4234, GNorm = 0.4857, lr_0 = 7.6228e-04
Loss = 8.5828e-02, PNorm = 131.4914, GNorm = 0.6837, lr_0 = 7.6176e-04
Loss = 8.0637e-02, PNorm = 131.5559, GNorm = 0.9514, lr_0 = 7.6123e-04
Loss = 8.5016e-02, PNorm = 131.6195, GNorm = 0.8243, lr_0 = 7.6071e-04
Loss = 7.2466e-02, PNorm = 131.6822, GNorm = 0.8728, lr_0 = 7.6019e-04
Loss = 1.0299e-01, PNorm = 131.7396, GNorm = 1.5919, lr_0 = 7.5967e-04
Loss = 8.3277e-02, PNorm = 131.8011, GNorm = 0.5386, lr_0 = 7.5915e-04
Loss = 7.3640e-02, PNorm = 131.8693, GNorm = 0.9340, lr_0 = 7.5863e-04
Loss = 6.9957e-02, PNorm = 131.9354, GNorm = 0.6474, lr_0 = 7.5811e-04
Loss = 8.6513e-02, PNorm = 132.0004, GNorm = 0.5652, lr_0 = 7.5759e-04
Loss = 8.9565e-02, PNorm = 132.0672, GNorm = 0.3885, lr_0 = 7.5707e-04
Loss = 8.4624e-02, PNorm = 132.1403, GNorm = 1.1021, lr_0 = 7.5655e-04
Loss = 7.8095e-02, PNorm = 132.2157, GNorm = 1.0496, lr_0 = 7.5603e-04
Loss = 9.5081e-02, PNorm = 132.2828, GNorm = 0.3590, lr_0 = 7.5552e-04
Loss = 8.8591e-02, PNorm = 132.3536, GNorm = 1.1553, lr_0 = 7.5500e-04
Loss = 9.1275e-02, PNorm = 132.4290, GNorm = 0.5611, lr_0 = 7.5448e-04
Loss = 8.4361e-02, PNorm = 132.5001, GNorm = 0.6335, lr_0 = 7.5397e-04
Loss = 1.0692e-01, PNorm = 132.5802, GNorm = 0.9685, lr_0 = 7.5345e-04
Loss = 9.2187e-02, PNorm = 132.6560, GNorm = 0.7226, lr_0 = 7.5293e-04
Loss = 9.2808e-02, PNorm = 132.7337, GNorm = 1.0156, lr_0 = 7.5242e-04
Loss = 8.4246e-02, PNorm = 132.8087, GNorm = 0.6480, lr_0 = 7.5190e-04
Loss = 9.2589e-02, PNorm = 132.8863, GNorm = 0.9193, lr_0 = 7.5139e-04
Loss = 9.2666e-02, PNorm = 132.9590, GNorm = 0.8462, lr_0 = 7.5087e-04
Loss = 8.5306e-02, PNorm = 133.0293, GNorm = 0.7959, lr_0 = 7.5036e-04
Loss = 9.4051e-02, PNorm = 133.1029, GNorm = 0.8528, lr_0 = 7.4984e-04
Loss = 8.1727e-02, PNorm = 133.1836, GNorm = 0.4704, lr_0 = 7.4933e-04
Loss = 8.7854e-02, PNorm = 133.2617, GNorm = 0.3759, lr_0 = 7.4882e-04
Loss = 1.0212e-01, PNorm = 133.3408, GNorm = 0.6447, lr_0 = 7.4830e-04
Loss = 8.2397e-02, PNorm = 133.4237, GNorm = 0.4746, lr_0 = 7.4779e-04
Loss = 9.2456e-02, PNorm = 133.5064, GNorm = 0.4114, lr_0 = 7.4728e-04
Loss = 8.3647e-02, PNorm = 133.5841, GNorm = 0.5400, lr_0 = 7.4677e-04
Loss = 7.4164e-02, PNorm = 133.6604, GNorm = 0.6596, lr_0 = 7.4625e-04
Loss = 8.9452e-02, PNorm = 133.7336, GNorm = 0.7307, lr_0 = 7.4574e-04
Loss = 1.0293e-01, PNorm = 133.8030, GNorm = 1.3525, lr_0 = 7.4523e-04
Loss = 9.2044e-02, PNorm = 133.8858, GNorm = 0.6073, lr_0 = 7.4472e-04
Loss = 9.7237e-02, PNorm = 133.9615, GNorm = 0.7240, lr_0 = 7.4421e-04
Loss = 9.1691e-02, PNorm = 134.0418, GNorm = 1.1335, lr_0 = 7.4370e-04
Loss = 8.9272e-02, PNorm = 134.1253, GNorm = 0.9744, lr_0 = 7.4319e-04
Loss = 8.7875e-02, PNorm = 134.2057, GNorm = 0.4729, lr_0 = 7.4268e-04
Loss = 7.6412e-02, PNorm = 134.2825, GNorm = 0.6172, lr_0 = 7.4217e-04
Loss = 1.1410e-01, PNorm = 134.3501, GNorm = 0.6619, lr_0 = 7.4167e-04
Loss = 1.0812e-01, PNorm = 134.4326, GNorm = 0.7591, lr_0 = 7.4116e-04
Loss = 8.4153e-02, PNorm = 134.5138, GNorm = 0.7397, lr_0 = 7.4065e-04
Loss = 9.5361e-02, PNorm = 134.5952, GNorm = 0.6807, lr_0 = 7.4014e-04
Loss = 9.4603e-02, PNorm = 134.6831, GNorm = 0.5159, lr_0 = 7.3964e-04
Loss = 1.0717e-01, PNorm = 134.7595, GNorm = 1.2854, lr_0 = 7.3913e-04
Loss = 8.5972e-02, PNorm = 134.8416, GNorm = 0.6548, lr_0 = 7.3862e-04
Loss = 9.2554e-02, PNorm = 134.9223, GNorm = 0.8281, lr_0 = 7.3812e-04
Loss = 9.7143e-02, PNorm = 135.0036, GNorm = 0.6887, lr_0 = 7.3761e-04
Loss = 1.0143e-01, PNorm = 135.0949, GNorm = 0.9517, lr_0 = 7.3711e-04
Loss = 8.6610e-02, PNorm = 135.1729, GNorm = 0.5874, lr_0 = 7.3660e-04
Loss = 8.9119e-02, PNorm = 135.2454, GNorm = 0.6969, lr_0 = 7.3610e-04
Loss = 9.2527e-02, PNorm = 135.3199, GNorm = 0.7940, lr_0 = 7.3559e-04
Loss = 9.1372e-02, PNorm = 135.3923, GNorm = 0.5445, lr_0 = 7.3509e-04
Loss = 8.3554e-02, PNorm = 135.4678, GNorm = 0.4319, lr_0 = 7.3458e-04
Loss = 9.5581e-02, PNorm = 135.5470, GNorm = 0.5882, lr_0 = 7.3408e-04
Loss = 7.7968e-02, PNorm = 135.6288, GNorm = 0.8135, lr_0 = 7.3358e-04
Loss = 8.7718e-02, PNorm = 135.7114, GNorm = 0.6680, lr_0 = 7.3308e-04
Loss = 9.5910e-02, PNorm = 135.7846, GNorm = 0.8127, lr_0 = 7.3257e-04
Loss = 7.9487e-02, PNorm = 135.8702, GNorm = 0.5526, lr_0 = 7.3207e-04
Loss = 8.8158e-02, PNorm = 135.9486, GNorm = 0.7489, lr_0 = 7.3157e-04
Loss = 1.0361e-01, PNorm = 136.0326, GNorm = 0.7135, lr_0 = 7.3107e-04
Loss = 9.6234e-02, PNorm = 136.1140, GNorm = 0.5293, lr_0 = 7.3057e-04
Loss = 9.8535e-02, PNorm = 136.1943, GNorm = 0.9958, lr_0 = 7.3007e-04
Loss = 9.5040e-02, PNorm = 136.2782, GNorm = 0.7481, lr_0 = 7.2957e-04
Loss = 1.0078e-01, PNorm = 136.3623, GNorm = 0.4472, lr_0 = 7.2907e-04
Loss = 1.0454e-01, PNorm = 136.4498, GNorm = 0.4793, lr_0 = 7.2857e-04
Loss = 8.3915e-02, PNorm = 136.5328, GNorm = 0.4501, lr_0 = 7.2807e-04
Loss = 8.6289e-02, PNorm = 136.6182, GNorm = 1.0148, lr_0 = 7.2757e-04
Loss = 9.1442e-02, PNorm = 136.7053, GNorm = 0.3844, lr_0 = 7.2707e-04
Loss = 1.1115e-01, PNorm = 136.7912, GNorm = 0.6877, lr_0 = 7.2657e-04
Loss = 9.3032e-02, PNorm = 136.8709, GNorm = 0.3758, lr_0 = 7.2608e-04
Loss = 8.5086e-02, PNorm = 136.9532, GNorm = 0.4533, lr_0 = 7.2558e-04
Loss = 8.6454e-02, PNorm = 137.0288, GNorm = 0.4061, lr_0 = 7.2508e-04
Loss = 9.4265e-02, PNorm = 137.1065, GNorm = 0.5582, lr_0 = 7.2458e-04
Loss = 1.0904e-01, PNorm = 137.1882, GNorm = 0.8371, lr_0 = 7.2409e-04
Loss = 1.0374e-01, PNorm = 137.2730, GNorm = 0.6065, lr_0 = 7.2359e-04
Loss = 9.6302e-02, PNorm = 137.3592, GNorm = 0.9995, lr_0 = 7.2310e-04
Loss = 8.1272e-02, PNorm = 137.4499, GNorm = 0.6981, lr_0 = 7.2260e-04
Loss = 9.6670e-02, PNorm = 137.5313, GNorm = 0.5223, lr_0 = 7.2211e-04
Loss = 9.5408e-02, PNorm = 137.6168, GNorm = 0.7448, lr_0 = 7.2161e-04
Loss = 8.7015e-02, PNorm = 137.6997, GNorm = 0.7352, lr_0 = 7.2112e-04
Loss = 9.1440e-02, PNorm = 137.7814, GNorm = 0.4657, lr_0 = 7.2062e-04
Loss = 9.2242e-02, PNorm = 137.8604, GNorm = 0.6544, lr_0 = 7.2013e-04
Loss = 1.0337e-01, PNorm = 137.9482, GNorm = 0.6014, lr_0 = 7.1964e-04
Validation mae = 0.125184
Epoch 6
Loss = 6.9364e-02, PNorm = 138.0227, GNorm = 0.4197, lr_0 = 7.1914e-04
Loss = 6.8376e-02, PNorm = 138.0808, GNorm = 0.3872, lr_0 = 7.1865e-04
Loss = 6.5046e-02, PNorm = 138.1356, GNorm = 0.9081, lr_0 = 7.1816e-04
Loss = 5.6124e-02, PNorm = 138.1844, GNorm = 0.4591, lr_0 = 7.1767e-04
Loss = 7.1699e-02, PNorm = 138.2295, GNorm = 0.4369, lr_0 = 7.1717e-04
Loss = 6.8399e-02, PNorm = 138.2802, GNorm = 0.5050, lr_0 = 7.1668e-04
Loss = 6.9228e-02, PNorm = 138.3367, GNorm = 0.5486, lr_0 = 7.1619e-04
Loss = 6.2201e-02, PNorm = 138.3905, GNorm = 0.5330, lr_0 = 7.1570e-04
Loss = 5.5413e-02, PNorm = 138.4453, GNorm = 0.5011, lr_0 = 7.1521e-04
Loss = 5.4475e-02, PNorm = 138.4928, GNorm = 0.4129, lr_0 = 7.1472e-04
Loss = 6.4927e-02, PNorm = 138.5395, GNorm = 0.4880, lr_0 = 7.1423e-04
Loss = 6.3108e-02, PNorm = 138.5917, GNorm = 0.6213, lr_0 = 7.1374e-04
Loss = 5.6540e-02, PNorm = 138.6351, GNorm = 0.4456, lr_0 = 7.1325e-04
Loss = 6.8749e-02, PNorm = 138.6893, GNorm = 0.5013, lr_0 = 7.1277e-04
Loss = 5.6013e-02, PNorm = 138.7426, GNorm = 0.5651, lr_0 = 7.1228e-04
Loss = 6.8218e-02, PNorm = 138.7944, GNorm = 0.4875, lr_0 = 7.1179e-04
Loss = 6.3677e-02, PNorm = 138.8441, GNorm = 0.3473, lr_0 = 7.1130e-04
Loss = 6.7333e-02, PNorm = 138.8926, GNorm = 0.5808, lr_0 = 7.1081e-04
Loss = 6.0979e-02, PNorm = 138.9453, GNorm = 0.6453, lr_0 = 7.1033e-04
Loss = 6.4925e-02, PNorm = 139.0006, GNorm = 1.1152, lr_0 = 7.0984e-04
Loss = 7.1989e-02, PNorm = 139.0564, GNorm = 0.7271, lr_0 = 7.0935e-04
Loss = 6.7409e-02, PNorm = 139.1163, GNorm = 0.6263, lr_0 = 7.0887e-04
Loss = 6.6223e-02, PNorm = 139.1733, GNorm = 0.4359, lr_0 = 7.0838e-04
Loss = 6.1636e-02, PNorm = 139.2385, GNorm = 0.5938, lr_0 = 7.0790e-04
Loss = 6.9166e-02, PNorm = 139.3058, GNorm = 0.5881, lr_0 = 7.0741e-04
Loss = 6.4438e-02, PNorm = 139.3642, GNorm = 0.9819, lr_0 = 7.0693e-04
Loss = 5.4917e-02, PNorm = 139.4156, GNorm = 0.3336, lr_0 = 7.0644e-04
Loss = 6.0462e-02, PNorm = 139.4689, GNorm = 0.7111, lr_0 = 7.0596e-04
Loss = 6.2091e-02, PNorm = 139.5258, GNorm = 0.7559, lr_0 = 7.0548e-04
Loss = 5.6578e-02, PNorm = 139.5740, GNorm = 0.6203, lr_0 = 7.0499e-04
Loss = 5.8984e-02, PNorm = 139.6232, GNorm = 0.5383, lr_0 = 7.0451e-04
Loss = 5.1430e-02, PNorm = 139.6754, GNorm = 0.4031, lr_0 = 7.0403e-04
Loss = 6.0926e-02, PNorm = 139.7338, GNorm = 0.6747, lr_0 = 7.0354e-04
Loss = 6.4020e-02, PNorm = 139.7861, GNorm = 0.5854, lr_0 = 7.0306e-04
Loss = 7.5887e-02, PNorm = 139.8438, GNorm = 0.4618, lr_0 = 7.0258e-04
Loss = 6.3623e-02, PNorm = 139.9045, GNorm = 0.4948, lr_0 = 7.0210e-04
Loss = 5.2429e-02, PNorm = 139.9654, GNorm = 0.5049, lr_0 = 7.0162e-04
Loss = 5.9743e-02, PNorm = 140.0248, GNorm = 1.2074, lr_0 = 7.0114e-04
Loss = 5.7837e-02, PNorm = 140.0842, GNorm = 0.5950, lr_0 = 7.0066e-04
Loss = 5.6129e-02, PNorm = 140.1424, GNorm = 0.3860, lr_0 = 7.0018e-04
Loss = 6.3483e-02, PNorm = 140.1944, GNorm = 0.8308, lr_0 = 6.9970e-04
Loss = 5.6670e-02, PNorm = 140.2499, GNorm = 0.4790, lr_0 = 6.9922e-04
Loss = 6.6808e-02, PNorm = 140.3018, GNorm = 0.5458, lr_0 = 6.9874e-04
Loss = 5.5876e-02, PNorm = 140.3589, GNorm = 0.4474, lr_0 = 6.9826e-04
Loss = 5.1909e-02, PNorm = 140.4129, GNorm = 0.3540, lr_0 = 6.9778e-04
Loss = 6.3772e-02, PNorm = 140.4684, GNorm = 0.4065, lr_0 = 6.9730e-04
Loss = 5.6813e-02, PNorm = 140.5244, GNorm = 0.5434, lr_0 = 6.9683e-04
Loss = 5.7364e-02, PNorm = 140.5833, GNorm = 0.4461, lr_0 = 6.9635e-04
Loss = 5.7255e-02, PNorm = 140.6410, GNorm = 0.7278, lr_0 = 6.9587e-04
Loss = 5.9598e-02, PNorm = 140.7059, GNorm = 0.3933, lr_0 = 6.9540e-04
Loss = 5.7323e-02, PNorm = 140.7657, GNorm = 0.5677, lr_0 = 6.9492e-04
Loss = 6.0366e-02, PNorm = 140.8153, GNorm = 1.1801, lr_0 = 6.9444e-04
Loss = 5.7821e-02, PNorm = 140.8726, GNorm = 0.5508, lr_0 = 6.9397e-04
Loss = 6.1272e-02, PNorm = 140.9291, GNorm = 0.6248, lr_0 = 6.9349e-04
Loss = 5.5423e-02, PNorm = 140.9909, GNorm = 0.4866, lr_0 = 6.9302e-04
Loss = 7.0872e-02, PNorm = 141.0498, GNorm = 0.4043, lr_0 = 6.9254e-04
Loss = 6.3232e-02, PNorm = 141.1073, GNorm = 0.4455, lr_0 = 6.9207e-04
Loss = 6.4369e-02, PNorm = 141.1685, GNorm = 0.6160, lr_0 = 6.9159e-04
Loss = 6.6765e-02, PNorm = 141.2355, GNorm = 0.8975, lr_0 = 6.9112e-04
Loss = 5.0817e-02, PNorm = 141.2935, GNorm = 0.7019, lr_0 = 6.9065e-04
Loss = 5.7808e-02, PNorm = 141.3492, GNorm = 0.3709, lr_0 = 6.9017e-04
Loss = 5.6227e-02, PNorm = 141.4070, GNorm = 0.4536, lr_0 = 6.8970e-04
Loss = 6.1748e-02, PNorm = 141.4633, GNorm = 0.4424, lr_0 = 6.8923e-04
Loss = 6.4764e-02, PNorm = 141.5199, GNorm = 0.8760, lr_0 = 6.8876e-04
Loss = 7.1237e-02, PNorm = 141.5821, GNorm = 0.3929, lr_0 = 6.8828e-04
Loss = 5.4885e-02, PNorm = 141.6409, GNorm = 0.4836, lr_0 = 6.8781e-04
Loss = 7.9700e-02, PNorm = 141.7023, GNorm = 0.5631, lr_0 = 6.8734e-04
Loss = 6.7151e-02, PNorm = 141.7585, GNorm = 0.4208, lr_0 = 6.8687e-04
Loss = 6.8985e-02, PNorm = 141.8293, GNorm = 0.5047, lr_0 = 6.8640e-04
Loss = 5.9881e-02, PNorm = 141.8942, GNorm = 0.9102, lr_0 = 6.8593e-04
Loss = 6.3801e-02, PNorm = 141.9613, GNorm = 0.4248, lr_0 = 6.8546e-04
Loss = 8.8645e-02, PNorm = 142.0246, GNorm = 0.5438, lr_0 = 6.8499e-04
Loss = 7.4039e-02, PNorm = 142.0906, GNorm = 0.5428, lr_0 = 6.8452e-04
Loss = 5.1981e-02, PNorm = 142.1618, GNorm = 0.8528, lr_0 = 6.8405e-04
Loss = 6.6202e-02, PNorm = 142.2208, GNorm = 0.6156, lr_0 = 6.8358e-04
Loss = 6.5166e-02, PNorm = 142.2959, GNorm = 0.4163, lr_0 = 6.8312e-04
Loss = 6.2073e-02, PNorm = 142.3513, GNorm = 0.5632, lr_0 = 6.8265e-04
Loss = 6.8916e-02, PNorm = 142.4153, GNorm = 0.9050, lr_0 = 6.8218e-04
Loss = 6.8919e-02, PNorm = 142.4870, GNorm = 0.7220, lr_0 = 6.8171e-04
Loss = 6.5891e-02, PNorm = 142.5536, GNorm = 0.6313, lr_0 = 6.8125e-04
Loss = 8.6003e-02, PNorm = 142.6211, GNorm = 1.0744, lr_0 = 6.8078e-04
Loss = 6.7340e-02, PNorm = 142.6916, GNorm = 0.9975, lr_0 = 6.8031e-04
Loss = 8.8669e-02, PNorm = 142.7648, GNorm = 0.9280, lr_0 = 6.7985e-04
Loss = 6.8822e-02, PNorm = 142.8328, GNorm = 0.5719, lr_0 = 6.7938e-04
Loss = 8.6411e-02, PNorm = 142.9025, GNorm = 0.6295, lr_0 = 6.7892e-04
Loss = 7.2581e-02, PNorm = 142.9764, GNorm = 0.4135, lr_0 = 6.7845e-04
Loss = 6.7281e-02, PNorm = 143.0520, GNorm = 0.4866, lr_0 = 6.7799e-04
Loss = 6.6742e-02, PNorm = 143.1307, GNorm = 1.0836, lr_0 = 6.7752e-04
Loss = 6.4490e-02, PNorm = 143.1949, GNorm = 0.3631, lr_0 = 6.7706e-04
Loss = 6.4911e-02, PNorm = 143.2598, GNorm = 1.2221, lr_0 = 6.7659e-04
Loss = 6.6414e-02, PNorm = 143.3201, GNorm = 0.3971, lr_0 = 6.7613e-04
Loss = 7.8748e-02, PNorm = 143.3823, GNorm = 0.5471, lr_0 = 6.7567e-04
Loss = 6.2185e-02, PNorm = 143.4503, GNorm = 0.5067, lr_0 = 6.7520e-04
Loss = 6.4406e-02, PNorm = 143.5143, GNorm = 0.6190, lr_0 = 6.7474e-04
Loss = 6.4553e-02, PNorm = 143.5837, GNorm = 0.5211, lr_0 = 6.7428e-04
Loss = 5.8063e-02, PNorm = 143.6486, GNorm = 0.9204, lr_0 = 6.7382e-04
Loss = 5.7729e-02, PNorm = 143.7143, GNorm = 0.3119, lr_0 = 6.7335e-04
Loss = 5.8851e-02, PNorm = 143.7748, GNorm = 0.4974, lr_0 = 6.7289e-04
Loss = 6.8472e-02, PNorm = 143.8368, GNorm = 0.9457, lr_0 = 6.7243e-04
Loss = 6.8530e-02, PNorm = 143.9060, GNorm = 0.4894, lr_0 = 6.7197e-04
Loss = 7.6623e-02, PNorm = 143.9725, GNorm = 0.6601, lr_0 = 6.7151e-04
Loss = 8.2721e-02, PNorm = 144.0548, GNorm = 0.6603, lr_0 = 6.7105e-04
Loss = 7.2889e-02, PNorm = 144.1310, GNorm = 1.3455, lr_0 = 6.7059e-04
Loss = 6.0557e-02, PNorm = 144.2046, GNorm = 0.7803, lr_0 = 6.7013e-04
Loss = 6.6735e-02, PNorm = 144.2767, GNorm = 0.5465, lr_0 = 6.6967e-04
Loss = 6.8138e-02, PNorm = 144.3437, GNorm = 0.6440, lr_0 = 6.6921e-04
Loss = 6.6581e-02, PNorm = 144.4153, GNorm = 0.5913, lr_0 = 6.6876e-04
Loss = 7.1799e-02, PNorm = 144.4894, GNorm = 0.6809, lr_0 = 6.6830e-04
Loss = 7.3378e-02, PNorm = 144.5679, GNorm = 0.3930, lr_0 = 6.6784e-04
Loss = 7.6163e-02, PNorm = 144.6452, GNorm = 0.4763, lr_0 = 6.6738e-04
Loss = 6.3652e-02, PNorm = 144.7271, GNorm = 0.6528, lr_0 = 6.6693e-04
Loss = 6.9746e-02, PNorm = 144.7961, GNorm = 0.4562, lr_0 = 6.6647e-04
Loss = 6.9234e-02, PNorm = 144.8712, GNorm = 0.5247, lr_0 = 6.6601e-04
Loss = 7.2388e-02, PNorm = 144.9489, GNorm = 0.3633, lr_0 = 6.6556e-04
Loss = 6.9689e-02, PNorm = 145.0278, GNorm = 0.5431, lr_0 = 6.6510e-04
Loss = 7.2539e-02, PNorm = 145.1148, GNorm = 0.6156, lr_0 = 6.6464e-04
Loss = 7.6957e-02, PNorm = 145.1906, GNorm = 0.6289, lr_0 = 6.6419e-04
Loss = 6.9801e-02, PNorm = 145.2703, GNorm = 0.6798, lr_0 = 6.6373e-04
Loss = 7.6485e-02, PNorm = 145.3526, GNorm = 0.6429, lr_0 = 6.6328e-04
Loss = 8.6392e-02, PNorm = 145.4194, GNorm = 0.3883, lr_0 = 6.6282e-04
Validation mae = 0.123370
Epoch 7
Loss = 5.3022e-02, PNorm = 145.4766, GNorm = 0.5497, lr_0 = 6.6237e-04
Loss = 5.5573e-02, PNorm = 145.5281, GNorm = 0.6432, lr_0 = 6.6192e-04
Loss = 6.8018e-02, PNorm = 145.5733, GNorm = 0.6948, lr_0 = 6.6146e-04
Loss = 4.8261e-02, PNorm = 145.6187, GNorm = 0.5796, lr_0 = 6.6101e-04
Loss = 4.8198e-02, PNorm = 145.6644, GNorm = 0.3324, lr_0 = 6.6056e-04
Loss = 5.8517e-02, PNorm = 145.7100, GNorm = 0.5439, lr_0 = 6.6011e-04
Loss = 6.6251e-02, PNorm = 145.7557, GNorm = 0.4458, lr_0 = 6.5965e-04
Loss = 5.6178e-02, PNorm = 145.8102, GNorm = 0.5964, lr_0 = 6.5920e-04
Loss = 6.2607e-02, PNorm = 145.8650, GNorm = 0.5047, lr_0 = 6.5875e-04
Loss = 4.1689e-02, PNorm = 145.9214, GNorm = 0.6201, lr_0 = 6.5830e-04
Loss = 5.3826e-02, PNorm = 145.9700, GNorm = 0.3080, lr_0 = 6.5785e-04
Loss = 4.8993e-02, PNorm = 146.0191, GNorm = 0.5068, lr_0 = 6.5740e-04
Loss = 5.8630e-02, PNorm = 146.0656, GNorm = 0.4524, lr_0 = 6.5695e-04
Loss = 5.1532e-02, PNorm = 146.1125, GNorm = 0.4814, lr_0 = 6.5650e-04
Loss = 5.7539e-02, PNorm = 146.1570, GNorm = 0.5467, lr_0 = 6.5605e-04
Loss = 4.4462e-02, PNorm = 146.2037, GNorm = 0.4630, lr_0 = 6.5560e-04
Loss = 5.7760e-02, PNorm = 146.2488, GNorm = 0.9252, lr_0 = 6.5515e-04
Loss = 5.0332e-02, PNorm = 146.2960, GNorm = 0.7826, lr_0 = 6.5470e-04
Loss = 4.1024e-02, PNorm = 146.3415, GNorm = 0.3277, lr_0 = 6.5425e-04
Loss = 4.7518e-02, PNorm = 146.3915, GNorm = 0.4343, lr_0 = 6.5380e-04
Loss = 4.2123e-02, PNorm = 146.4367, GNorm = 0.4422, lr_0 = 6.5335e-04
Loss = 6.2972e-02, PNorm = 146.4756, GNorm = 2.7028, lr_0 = 6.5291e-04
Loss = 5.8985e-02, PNorm = 146.5215, GNorm = 0.5086, lr_0 = 6.5246e-04
Loss = 4.9198e-02, PNorm = 146.5628, GNorm = 0.7025, lr_0 = 6.5201e-04
Loss = 4.8894e-02, PNorm = 146.6096, GNorm = 0.8421, lr_0 = 6.5157e-04
Loss = 3.9906e-02, PNorm = 146.6542, GNorm = 0.5583, lr_0 = 6.5112e-04
Loss = 4.2192e-02, PNorm = 146.6973, GNorm = 0.4353, lr_0 = 6.5067e-04
Loss = 5.3711e-02, PNorm = 146.7394, GNorm = 0.3570, lr_0 = 6.5023e-04
Loss = 3.6777e-02, PNorm = 146.7850, GNorm = 0.5260, lr_0 = 6.4978e-04
Loss = 4.4896e-02, PNorm = 146.8356, GNorm = 0.5675, lr_0 = 6.4934e-04
Loss = 4.4531e-02, PNorm = 146.8844, GNorm = 0.4000, lr_0 = 6.4889e-04
Loss = 5.1090e-02, PNorm = 146.9279, GNorm = 0.9660, lr_0 = 6.4845e-04
Loss = 4.9574e-02, PNorm = 146.9742, GNorm = 0.4308, lr_0 = 6.4800e-04
Loss = 4.3361e-02, PNorm = 147.0216, GNorm = 0.4230, lr_0 = 6.4756e-04
Loss = 4.4641e-02, PNorm = 147.0703, GNorm = 0.6728, lr_0 = 6.4712e-04
Loss = 4.9472e-02, PNorm = 147.1158, GNorm = 0.5744, lr_0 = 6.4667e-04
Loss = 4.3817e-02, PNorm = 147.1659, GNorm = 0.5384, lr_0 = 6.4623e-04
Loss = 4.8356e-02, PNorm = 147.2157, GNorm = 0.5113, lr_0 = 6.4579e-04
Loss = 5.1342e-02, PNorm = 147.2681, GNorm = 0.6020, lr_0 = 6.4534e-04
Loss = 5.4777e-02, PNorm = 147.3162, GNorm = 1.3593, lr_0 = 6.4490e-04
Loss = 4.9955e-02, PNorm = 147.3623, GNorm = 0.5751, lr_0 = 6.4446e-04
Loss = 4.8024e-02, PNorm = 147.4128, GNorm = 0.5000, lr_0 = 6.4402e-04
Loss = 4.3244e-02, PNorm = 147.4664, GNorm = 0.3467, lr_0 = 6.4358e-04
Loss = 4.3543e-02, PNorm = 147.5196, GNorm = 0.3964, lr_0 = 6.4314e-04
Loss = 4.1727e-02, PNorm = 147.5672, GNorm = 0.4943, lr_0 = 6.4270e-04
Loss = 5.1871e-02, PNorm = 147.6136, GNorm = 1.1066, lr_0 = 6.4226e-04
Loss = 5.3271e-02, PNorm = 147.6670, GNorm = 0.6023, lr_0 = 6.4182e-04
Loss = 3.8442e-02, PNorm = 147.7173, GNorm = 0.6051, lr_0 = 6.4138e-04
Loss = 4.7322e-02, PNorm = 147.7678, GNorm = 0.5399, lr_0 = 6.4094e-04
Loss = 5.1134e-02, PNorm = 147.8181, GNorm = 0.4588, lr_0 = 6.4050e-04
Loss = 4.8331e-02, PNorm = 147.8691, GNorm = 0.5848, lr_0 = 6.4006e-04
Loss = 4.6352e-02, PNorm = 147.9179, GNorm = 0.6150, lr_0 = 6.3962e-04
Loss = 5.7120e-02, PNorm = 147.9636, GNorm = 1.0388, lr_0 = 6.3918e-04
Loss = 4.6526e-02, PNorm = 148.0188, GNorm = 0.3822, lr_0 = 6.3874e-04
Loss = 5.1739e-02, PNorm = 148.0670, GNorm = 0.6066, lr_0 = 6.3831e-04
Loss = 4.2804e-02, PNorm = 148.1173, GNorm = 0.5229, lr_0 = 6.3787e-04
Loss = 4.4881e-02, PNorm = 148.1706, GNorm = 0.3921, lr_0 = 6.3743e-04
Loss = 4.9822e-02, PNorm = 148.2143, GNorm = 0.4241, lr_0 = 6.3700e-04
Loss = 6.1199e-02, PNorm = 148.2655, GNorm = 0.4910, lr_0 = 6.3656e-04
Loss = 4.8431e-02, PNorm = 148.3184, GNorm = 0.5534, lr_0 = 6.3612e-04
Loss = 4.4391e-02, PNorm = 148.3683, GNorm = 0.3375, lr_0 = 6.3569e-04
Loss = 4.8293e-02, PNorm = 148.4178, GNorm = 0.5933, lr_0 = 6.3525e-04
Loss = 5.1169e-02, PNorm = 148.4660, GNorm = 0.7217, lr_0 = 6.3482e-04
Loss = 5.4645e-02, PNorm = 148.5208, GNorm = 0.5124, lr_0 = 6.3438e-04
Loss = 4.4258e-02, PNorm = 148.5725, GNorm = 0.5902, lr_0 = 6.3395e-04
Loss = 4.8415e-02, PNorm = 148.6246, GNorm = 0.4783, lr_0 = 6.3351e-04
Loss = 5.8677e-02, PNorm = 148.6763, GNorm = 0.4685, lr_0 = 6.3308e-04
Loss = 4.9646e-02, PNorm = 148.7353, GNorm = 1.1219, lr_0 = 6.3265e-04
Loss = 5.3276e-02, PNorm = 148.7896, GNorm = 0.4087, lr_0 = 6.3221e-04
Loss = 5.3671e-02, PNorm = 148.8490, GNorm = 0.7063, lr_0 = 6.3178e-04
Loss = 4.7669e-02, PNorm = 148.9076, GNorm = 0.5065, lr_0 = 6.3135e-04
Loss = 5.8779e-02, PNorm = 148.9669, GNorm = 0.3940, lr_0 = 6.3091e-04
Loss = 5.3521e-02, PNorm = 149.0274, GNorm = 0.5549, lr_0 = 6.3048e-04
Loss = 4.2656e-02, PNorm = 149.0810, GNorm = 0.6025, lr_0 = 6.3005e-04
Loss = 4.9984e-02, PNorm = 149.1314, GNorm = 0.6717, lr_0 = 6.2962e-04
Loss = 5.4075e-02, PNorm = 149.1876, GNorm = 0.6742, lr_0 = 6.2919e-04
Loss = 5.1234e-02, PNorm = 149.2428, GNorm = 1.0161, lr_0 = 6.2876e-04
Loss = 4.7783e-02, PNorm = 149.2949, GNorm = 0.6095, lr_0 = 6.2833e-04
Loss = 4.4687e-02, PNorm = 149.3499, GNorm = 0.3995, lr_0 = 6.2789e-04
Loss = 6.7774e-02, PNorm = 149.3975, GNorm = 0.5066, lr_0 = 6.2746e-04
Loss = 5.5229e-02, PNorm = 149.4531, GNorm = 0.4647, lr_0 = 6.2703e-04
Loss = 4.6515e-02, PNorm = 149.5125, GNorm = 0.3441, lr_0 = 6.2661e-04
Loss = 5.4388e-02, PNorm = 149.5622, GNorm = 0.2972, lr_0 = 6.2618e-04
Loss = 5.6215e-02, PNorm = 149.6224, GNorm = 0.3926, lr_0 = 6.2575e-04
Loss = 4.6993e-02, PNorm = 149.6810, GNorm = 0.3444, lr_0 = 6.2532e-04
Loss = 5.1014e-02, PNorm = 149.7314, GNorm = 0.5572, lr_0 = 6.2489e-04
Loss = 6.2804e-02, PNorm = 149.7893, GNorm = 0.5763, lr_0 = 6.2446e-04
Loss = 4.8057e-02, PNorm = 149.8503, GNorm = 0.3565, lr_0 = 6.2403e-04
Loss = 6.8415e-02, PNorm = 149.9136, GNorm = 0.4270, lr_0 = 6.2361e-04
Loss = 5.1790e-02, PNorm = 149.9736, GNorm = 0.6291, lr_0 = 6.2318e-04
Loss = 5.1190e-02, PNorm = 150.0333, GNorm = 0.6393, lr_0 = 6.2275e-04
Loss = 4.9219e-02, PNorm = 150.0918, GNorm = 0.5023, lr_0 = 6.2233e-04
Loss = 4.6633e-02, PNorm = 150.1486, GNorm = 0.6012, lr_0 = 6.2190e-04
Loss = 6.5600e-02, PNorm = 150.2068, GNorm = 0.5218, lr_0 = 6.2147e-04
Loss = 6.6074e-02, PNorm = 150.2714, GNorm = 0.7915, lr_0 = 6.2105e-04
Loss = 5.0119e-02, PNorm = 150.3313, GNorm = 0.4454, lr_0 = 6.2062e-04
Loss = 4.5761e-02, PNorm = 150.3892, GNorm = 0.4630, lr_0 = 6.2020e-04
Loss = 5.3992e-02, PNorm = 150.4423, GNorm = 0.9602, lr_0 = 6.1977e-04
Loss = 5.1514e-02, PNorm = 150.4955, GNorm = 0.3748, lr_0 = 6.1935e-04
Loss = 5.2585e-02, PNorm = 150.5560, GNorm = 0.6793, lr_0 = 6.1892e-04
Loss = 5.4107e-02, PNorm = 150.6112, GNorm = 0.5274, lr_0 = 6.1850e-04
Loss = 5.7908e-02, PNorm = 150.6710, GNorm = 0.6913, lr_0 = 6.1808e-04
Loss = 5.4291e-02, PNorm = 150.7315, GNorm = 0.3711, lr_0 = 6.1765e-04
Loss = 4.6996e-02, PNorm = 150.7882, GNorm = 0.3017, lr_0 = 6.1723e-04
Loss = 7.9137e-02, PNorm = 150.8523, GNorm = 0.8522, lr_0 = 6.1681e-04
Loss = 5.1864e-02, PNorm = 150.9193, GNorm = 0.5590, lr_0 = 6.1638e-04
Loss = 5.6642e-02, PNorm = 150.9828, GNorm = 0.5598, lr_0 = 6.1596e-04
Loss = 5.0354e-02, PNorm = 151.0460, GNorm = 0.6309, lr_0 = 6.1554e-04
Loss = 5.2112e-02, PNorm = 151.1021, GNorm = 0.4676, lr_0 = 6.1512e-04
Loss = 4.1723e-02, PNorm = 151.1562, GNorm = 0.5076, lr_0 = 6.1470e-04
Loss = 5.7028e-02, PNorm = 151.2170, GNorm = 0.6089, lr_0 = 6.1428e-04
Loss = 5.6220e-02, PNorm = 151.2782, GNorm = 0.6898, lr_0 = 6.1385e-04
Loss = 5.7340e-02, PNorm = 151.3396, GNorm = 0.3808, lr_0 = 6.1343e-04
Loss = 5.5559e-02, PNorm = 151.3988, GNorm = 0.5313, lr_0 = 6.1301e-04
Loss = 5.5297e-02, PNorm = 151.4565, GNorm = 0.5572, lr_0 = 6.1259e-04
Loss = 4.7512e-02, PNorm = 151.5101, GNorm = 0.7286, lr_0 = 6.1217e-04
Loss = 5.7422e-02, PNorm = 151.5709, GNorm = 0.3915, lr_0 = 6.1175e-04
Loss = 5.0483e-02, PNorm = 151.6258, GNorm = 0.4451, lr_0 = 6.1134e-04
Loss = 4.5769e-02, PNorm = 151.6823, GNorm = 0.6673, lr_0 = 6.1092e-04
Loss = 4.6998e-02, PNorm = 151.7412, GNorm = 0.3695, lr_0 = 6.1050e-04
Validation mae = 0.123637
Epoch 8
Loss = 3.8427e-02, PNorm = 151.7819, GNorm = 0.2943, lr_0 = 6.1008e-04
Loss = 4.3097e-02, PNorm = 151.8209, GNorm = 0.4420, lr_0 = 6.0966e-04
Loss = 4.4446e-02, PNorm = 151.8566, GNorm = 0.4248, lr_0 = 6.0924e-04
Loss = 4.5883e-02, PNorm = 151.8901, GNorm = 0.3367, lr_0 = 6.0883e-04
Loss = 4.1116e-02, PNorm = 151.9253, GNorm = 0.4559, lr_0 = 6.0841e-04
Loss = 4.1177e-02, PNorm = 151.9679, GNorm = 0.3557, lr_0 = 6.0799e-04
Loss = 3.5967e-02, PNorm = 152.0057, GNorm = 0.3016, lr_0 = 6.0758e-04
Loss = 3.4617e-02, PNorm = 152.0429, GNorm = 0.2353, lr_0 = 6.0716e-04
Loss = 3.4287e-02, PNorm = 152.0786, GNorm = 0.4415, lr_0 = 6.0674e-04
Loss = 3.8967e-02, PNorm = 152.1166, GNorm = 0.3617, lr_0 = 6.0633e-04
Loss = 3.6698e-02, PNorm = 152.1531, GNorm = 0.3848, lr_0 = 6.0591e-04
Loss = 3.9325e-02, PNorm = 152.1830, GNorm = 0.4375, lr_0 = 6.0550e-04
Loss = 4.0204e-02, PNorm = 152.2193, GNorm = 0.4517, lr_0 = 6.0508e-04
Loss = 3.1962e-02, PNorm = 152.2542, GNorm = 0.7337, lr_0 = 6.0467e-04
Loss = 3.9686e-02, PNorm = 152.2932, GNorm = 0.4965, lr_0 = 6.0425e-04
Loss = 3.7703e-02, PNorm = 152.3306, GNorm = 0.5388, lr_0 = 6.0384e-04
Loss = 4.6500e-02, PNorm = 152.3762, GNorm = 0.3970, lr_0 = 6.0343e-04
Loss = 3.5145e-02, PNorm = 152.4195, GNorm = 0.5903, lr_0 = 6.0301e-04
Loss = 4.7386e-02, PNorm = 152.4643, GNorm = 0.3797, lr_0 = 6.0260e-04
Loss = 3.4872e-02, PNorm = 152.5064, GNorm = 0.2925, lr_0 = 6.0219e-04
Loss = 4.2553e-02, PNorm = 152.5505, GNorm = 0.4335, lr_0 = 6.0178e-04
Loss = 4.1084e-02, PNorm = 152.5888, GNorm = 0.2723, lr_0 = 6.0136e-04
Loss = 4.4381e-02, PNorm = 152.6309, GNorm = 0.2517, lr_0 = 6.0095e-04
Loss = 3.9698e-02, PNorm = 152.6717, GNorm = 0.3238, lr_0 = 6.0054e-04
Loss = 4.2151e-02, PNorm = 152.7134, GNorm = 0.4676, lr_0 = 6.0013e-04
Loss = 4.6752e-02, PNorm = 152.7515, GNorm = 0.5322, lr_0 = 5.9972e-04
Loss = 3.6012e-02, PNorm = 152.7966, GNorm = 0.3625, lr_0 = 5.9931e-04
Loss = 3.7669e-02, PNorm = 152.8390, GNorm = 0.3103, lr_0 = 5.9890e-04
Loss = 4.0039e-02, PNorm = 152.8827, GNorm = 0.3405, lr_0 = 5.9849e-04
Loss = 3.6234e-02, PNorm = 152.9239, GNorm = 0.3682, lr_0 = 5.9808e-04
Loss = 3.2941e-02, PNorm = 152.9652, GNorm = 0.5879, lr_0 = 5.9767e-04
Loss = 3.3689e-02, PNorm = 153.0032, GNorm = 0.6075, lr_0 = 5.9726e-04
Loss = 4.5845e-02, PNorm = 153.0414, GNorm = 0.6411, lr_0 = 5.9685e-04
Loss = 5.6787e-02, PNorm = 153.0857, GNorm = 0.7744, lr_0 = 5.9644e-04
Loss = 3.8561e-02, PNorm = 153.1290, GNorm = 0.2686, lr_0 = 5.9603e-04
Loss = 4.3064e-02, PNorm = 153.1750, GNorm = 0.3544, lr_0 = 5.9562e-04
Loss = 4.5276e-02, PNorm = 153.2234, GNorm = 0.7005, lr_0 = 5.9521e-04
Loss = 4.9043e-02, PNorm = 153.2701, GNorm = 0.8639, lr_0 = 5.9481e-04
Loss = 4.0563e-02, PNorm = 153.3164, GNorm = 0.8651, lr_0 = 5.9440e-04
Loss = 3.5484e-02, PNorm = 153.3631, GNorm = 0.4567, lr_0 = 5.9399e-04
Loss = 3.1760e-02, PNorm = 153.4059, GNorm = 0.3122, lr_0 = 5.9358e-04
Loss = 4.3793e-02, PNorm = 153.4482, GNorm = 0.4368, lr_0 = 5.9318e-04
Loss = 3.6173e-02, PNorm = 153.4875, GNorm = 0.3838, lr_0 = 5.9277e-04
Loss = 3.2672e-02, PNorm = 153.5293, GNorm = 0.4507, lr_0 = 5.9236e-04
Loss = 5.0322e-02, PNorm = 153.5618, GNorm = 0.6905, lr_0 = 5.9196e-04
Loss = 4.7907e-02, PNorm = 153.6024, GNorm = 0.4835, lr_0 = 5.9155e-04
Loss = 3.8955e-02, PNorm = 153.6419, GNorm = 0.4998, lr_0 = 5.9115e-04
Loss = 4.2465e-02, PNorm = 153.6842, GNorm = 0.4655, lr_0 = 5.9074e-04
Loss = 3.7106e-02, PNorm = 153.7343, GNorm = 0.9782, lr_0 = 5.9034e-04
Loss = 4.2992e-02, PNorm = 153.7771, GNorm = 0.5435, lr_0 = 5.8993e-04
Loss = 4.0189e-02, PNorm = 153.8228, GNorm = 0.8567, lr_0 = 5.8953e-04
Loss = 4.6191e-02, PNorm = 153.8660, GNorm = 0.4497, lr_0 = 5.8913e-04
Loss = 4.5642e-02, PNorm = 153.9073, GNorm = 0.3561, lr_0 = 5.8872e-04
Loss = 4.2811e-02, PNorm = 153.9509, GNorm = 0.5315, lr_0 = 5.8832e-04
Loss = 3.7476e-02, PNorm = 153.9997, GNorm = 0.5231, lr_0 = 5.8792e-04
Loss = 3.8562e-02, PNorm = 154.0458, GNorm = 0.4140, lr_0 = 5.8751e-04
Loss = 4.3875e-02, PNorm = 154.0954, GNorm = 0.4445, lr_0 = 5.8711e-04
Loss = 3.6009e-02, PNorm = 154.1392, GNorm = 0.7134, lr_0 = 5.8671e-04
Loss = 4.5135e-02, PNorm = 154.1851, GNorm = 0.5092, lr_0 = 5.8631e-04
Loss = 4.5733e-02, PNorm = 154.2389, GNorm = 0.5413, lr_0 = 5.8591e-04
Loss = 5.8895e-02, PNorm = 154.2938, GNorm = 0.3171, lr_0 = 5.8550e-04
Loss = 3.9160e-02, PNorm = 154.3464, GNorm = 0.6339, lr_0 = 5.8510e-04
Loss = 3.8661e-02, PNorm = 154.3951, GNorm = 0.3718, lr_0 = 5.8470e-04
Loss = 3.3853e-02, PNorm = 154.4409, GNorm = 0.5027, lr_0 = 5.8430e-04
Loss = 3.8538e-02, PNorm = 154.4819, GNorm = 0.6076, lr_0 = 5.8390e-04
Loss = 4.2635e-02, PNorm = 154.5224, GNorm = 0.6002, lr_0 = 5.8350e-04
Loss = 5.1414e-02, PNorm = 154.5646, GNorm = 0.4344, lr_0 = 5.8310e-04
Loss = 3.7454e-02, PNorm = 154.6112, GNorm = 0.7083, lr_0 = 5.8270e-04
Loss = 4.2503e-02, PNorm = 154.6644, GNorm = 0.4530, lr_0 = 5.8230e-04
Loss = 3.5867e-02, PNorm = 154.7153, GNorm = 0.4901, lr_0 = 5.8190e-04
Loss = 3.2774e-02, PNorm = 154.7618, GNorm = 0.4911, lr_0 = 5.8151e-04
Loss = 3.4379e-02, PNorm = 154.8088, GNorm = 0.5661, lr_0 = 5.8111e-04
Loss = 3.5917e-02, PNorm = 154.8518, GNorm = 0.4732, lr_0 = 5.8071e-04
Loss = 3.6410e-02, PNorm = 154.8954, GNorm = 0.4469, lr_0 = 5.8031e-04
Loss = 4.3913e-02, PNorm = 154.9455, GNorm = 0.3481, lr_0 = 5.7991e-04
Loss = 4.0580e-02, PNorm = 154.9983, GNorm = 0.2897, lr_0 = 5.7952e-04
Loss = 3.5732e-02, PNorm = 155.0545, GNorm = 0.2382, lr_0 = 5.7912e-04
Loss = 3.9221e-02, PNorm = 155.1048, GNorm = 0.4162, lr_0 = 5.7872e-04
Loss = 3.6639e-02, PNorm = 155.1549, GNorm = 0.3952, lr_0 = 5.7833e-04
Loss = 4.2878e-02, PNorm = 155.2035, GNorm = 0.3546, lr_0 = 5.7793e-04
Loss = 3.3642e-02, PNorm = 155.2548, GNorm = 0.4193, lr_0 = 5.7753e-04
Loss = 4.0345e-02, PNorm = 155.3053, GNorm = 0.5479, lr_0 = 5.7714e-04
Loss = 4.0457e-02, PNorm = 155.3521, GNorm = 0.7594, lr_0 = 5.7674e-04
Loss = 4.3094e-02, PNorm = 155.4057, GNorm = 0.3850, lr_0 = 5.7635e-04
Loss = 4.9751e-02, PNorm = 155.4600, GNorm = 0.5943, lr_0 = 5.7595e-04
Loss = 4.8968e-02, PNorm = 155.5186, GNorm = 0.4220, lr_0 = 5.7556e-04
Loss = 3.6344e-02, PNorm = 155.5726, GNorm = 0.4823, lr_0 = 5.7516e-04
Loss = 4.2361e-02, PNorm = 155.6190, GNorm = 0.6048, lr_0 = 5.7477e-04
Loss = 4.5333e-02, PNorm = 155.6678, GNorm = 0.5416, lr_0 = 5.7438e-04
Loss = 4.9205e-02, PNorm = 155.7215, GNorm = 0.5791, lr_0 = 5.7398e-04
Loss = 3.8623e-02, PNorm = 155.7759, GNorm = 0.3912, lr_0 = 5.7359e-04
Loss = 4.0671e-02, PNorm = 155.8298, GNorm = 0.3871, lr_0 = 5.7320e-04
Loss = 3.8811e-02, PNorm = 155.8807, GNorm = 0.5709, lr_0 = 5.7280e-04
Loss = 4.9472e-02, PNorm = 155.9344, GNorm = 0.4161, lr_0 = 5.7241e-04
Loss = 4.5122e-02, PNorm = 155.9953, GNorm = 0.9117, lr_0 = 5.7202e-04
Loss = 4.8035e-02, PNorm = 156.0519, GNorm = 0.5574, lr_0 = 5.7163e-04
Loss = 4.7762e-02, PNorm = 156.1014, GNorm = 0.5995, lr_0 = 5.7124e-04
Loss = 4.4133e-02, PNorm = 156.1493, GNorm = 1.2464, lr_0 = 5.7084e-04
Loss = 5.5444e-02, PNorm = 156.2039, GNorm = 0.5142, lr_0 = 5.7045e-04
Loss = 4.6595e-02, PNorm = 156.2623, GNorm = 0.8347, lr_0 = 5.7006e-04
Loss = 4.7453e-02, PNorm = 156.3216, GNorm = 0.3638, lr_0 = 5.6967e-04
Loss = 3.9317e-02, PNorm = 156.3767, GNorm = 0.4728, lr_0 = 5.6928e-04
Loss = 4.2473e-02, PNorm = 156.4300, GNorm = 0.5690, lr_0 = 5.6889e-04
Loss = 4.0605e-02, PNorm = 156.4829, GNorm = 0.6916, lr_0 = 5.6850e-04
Loss = 4.2727e-02, PNorm = 156.5307, GNorm = 0.7614, lr_0 = 5.6811e-04
Loss = 5.9239e-02, PNorm = 156.5830, GNorm = 0.6196, lr_0 = 5.6772e-04
Loss = 3.7175e-02, PNorm = 156.6356, GNorm = 0.5317, lr_0 = 5.6733e-04
Loss = 5.4581e-02, PNorm = 156.6920, GNorm = 0.5846, lr_0 = 5.6695e-04
Loss = 3.4935e-02, PNorm = 156.7469, GNorm = 0.3627, lr_0 = 5.6656e-04
Loss = 4.3875e-02, PNorm = 156.8002, GNorm = 0.5365, lr_0 = 5.6617e-04
Loss = 3.7779e-02, PNorm = 156.8500, GNorm = 0.5157, lr_0 = 5.6578e-04
Loss = 4.4744e-02, PNorm = 156.9000, GNorm = 0.7409, lr_0 = 5.6539e-04
Loss = 4.3873e-02, PNorm = 156.9521, GNorm = 0.8730, lr_0 = 5.6501e-04
Loss = 4.3049e-02, PNorm = 157.0076, GNorm = 0.6940, lr_0 = 5.6462e-04
Loss = 3.9487e-02, PNorm = 157.0578, GNorm = 0.3956, lr_0 = 5.6423e-04
Loss = 3.9410e-02, PNorm = 157.1097, GNorm = 0.8058, lr_0 = 5.6385e-04
Loss = 3.5967e-02, PNorm = 157.1603, GNorm = 0.4915, lr_0 = 5.6346e-04
Loss = 4.3458e-02, PNorm = 157.2115, GNorm = 0.4326, lr_0 = 5.6307e-04
Loss = 5.1471e-02, PNorm = 157.2696, GNorm = 0.6886, lr_0 = 5.6269e-04
Loss = 3.8860e-02, PNorm = 157.3300, GNorm = 0.3501, lr_0 = 5.6230e-04
Validation mae = 0.124567
Epoch 9
Loss = 3.3592e-02, PNorm = 157.3730, GNorm = 0.2668, lr_0 = 5.6192e-04
Loss = 3.6409e-02, PNorm = 157.4096, GNorm = 0.3761, lr_0 = 5.6153e-04
Loss = 3.3380e-02, PNorm = 157.4494, GNorm = 0.4774, lr_0 = 5.6115e-04
Loss = 4.7978e-02, PNorm = 157.4905, GNorm = 0.3516, lr_0 = 5.6076e-04
Loss = 3.5756e-02, PNorm = 157.5282, GNorm = 0.4794, lr_0 = 5.6038e-04
Loss = 3.4207e-02, PNorm = 157.5633, GNorm = 0.5764, lr_0 = 5.6000e-04
Loss = 4.3891e-02, PNorm = 157.5983, GNorm = 0.4084, lr_0 = 5.5961e-04
Loss = 3.3195e-02, PNorm = 157.6337, GNorm = 0.2940, lr_0 = 5.5923e-04
Loss = 3.6413e-02, PNorm = 157.6632, GNorm = 0.7336, lr_0 = 5.5885e-04
Loss = 4.6950e-02, PNorm = 157.6983, GNorm = 0.3040, lr_0 = 5.5846e-04
Loss = 3.8498e-02, PNorm = 157.7343, GNorm = 0.3915, lr_0 = 5.5808e-04
Loss = 3.4038e-02, PNorm = 157.7720, GNorm = 0.3078, lr_0 = 5.5770e-04
Loss = 3.9424e-02, PNorm = 157.8086, GNorm = 0.4677, lr_0 = 5.5732e-04
Loss = 3.6305e-02, PNorm = 157.8414, GNorm = 0.3177, lr_0 = 5.5693e-04
Loss = 3.3928e-02, PNorm = 157.8754, GNorm = 0.5127, lr_0 = 5.5655e-04
Loss = 3.0614e-02, PNorm = 157.9101, GNorm = 0.3604, lr_0 = 5.5617e-04
Loss = 2.9456e-02, PNorm = 157.9478, GNorm = 0.3653, lr_0 = 5.5579e-04
Loss = 2.8826e-02, PNorm = 157.9811, GNorm = 0.3661, lr_0 = 5.5541e-04
Loss = 3.4575e-02, PNorm = 158.0142, GNorm = 1.1124, lr_0 = 5.5503e-04
Loss = 3.0487e-02, PNorm = 158.0514, GNorm = 0.5660, lr_0 = 5.5465e-04
Loss = 2.9111e-02, PNorm = 158.0835, GNorm = 0.4664, lr_0 = 5.5427e-04
Loss = 2.9431e-02, PNorm = 158.1149, GNorm = 0.2779, lr_0 = 5.5389e-04
Loss = 2.8307e-02, PNorm = 158.1487, GNorm = 0.3110, lr_0 = 5.5351e-04
Loss = 3.0875e-02, PNorm = 158.1850, GNorm = 0.3422, lr_0 = 5.5313e-04
Loss = 3.0514e-02, PNorm = 158.2183, GNorm = 0.4620, lr_0 = 5.5275e-04
Loss = 3.3921e-02, PNorm = 158.2572, GNorm = 0.2600, lr_0 = 5.5237e-04
Loss = 3.4339e-02, PNorm = 158.2945, GNorm = 0.3767, lr_0 = 5.5199e-04
Loss = 2.8809e-02, PNorm = 158.3290, GNorm = 0.4817, lr_0 = 5.5162e-04
Loss = 3.6924e-02, PNorm = 158.3663, GNorm = 0.3674, lr_0 = 5.5124e-04
Loss = 2.6834e-02, PNorm = 158.4003, GNorm = 0.5379, lr_0 = 5.5086e-04
Loss = 3.9621e-02, PNorm = 158.4304, GNorm = 0.3915, lr_0 = 5.5048e-04
Loss = 3.4284e-02, PNorm = 158.4627, GNorm = 0.4824, lr_0 = 5.5011e-04
Loss = 3.2037e-02, PNorm = 158.4957, GNorm = 0.2742, lr_0 = 5.4973e-04
Loss = 3.2331e-02, PNorm = 158.5326, GNorm = 0.3539, lr_0 = 5.4935e-04
Loss = 4.1493e-02, PNorm = 158.5667, GNorm = 0.2744, lr_0 = 5.4898e-04
Loss = 3.7914e-02, PNorm = 158.6064, GNorm = 0.4493, lr_0 = 5.4860e-04
Loss = 2.8750e-02, PNorm = 158.6472, GNorm = 0.4461, lr_0 = 5.4822e-04
Loss = 3.0178e-02, PNorm = 158.6850, GNorm = 0.2379, lr_0 = 5.4785e-04
Loss = 4.4573e-02, PNorm = 158.7239, GNorm = 0.3434, lr_0 = 5.4747e-04
Loss = 3.0183e-02, PNorm = 158.7585, GNorm = 0.2953, lr_0 = 5.4710e-04
Loss = 3.4740e-02, PNorm = 158.7931, GNorm = 0.6460, lr_0 = 5.4672e-04
Loss = 3.4239e-02, PNorm = 158.8301, GNorm = 0.3160, lr_0 = 5.4635e-04
Loss = 2.9712e-02, PNorm = 158.8646, GNorm = 0.2855, lr_0 = 5.4597e-04
Loss = 2.8958e-02, PNorm = 158.8986, GNorm = 0.3966, lr_0 = 5.4560e-04
Loss = 3.6623e-02, PNorm = 158.9345, GNorm = 0.3770, lr_0 = 5.4523e-04
Loss = 4.5410e-02, PNorm = 158.9725, GNorm = 0.3093, lr_0 = 5.4485e-04
Loss = 3.0701e-02, PNorm = 159.0115, GNorm = 0.5598, lr_0 = 5.4448e-04
Loss = 2.8865e-02, PNorm = 159.0432, GNorm = 0.4460, lr_0 = 5.4411e-04
Loss = 4.1817e-02, PNorm = 159.0779, GNorm = 0.3477, lr_0 = 5.4373e-04
Loss = 3.7099e-02, PNorm = 159.1139, GNorm = 0.3283, lr_0 = 5.4336e-04
Loss = 3.1935e-02, PNorm = 159.1543, GNorm = 0.3494, lr_0 = 5.4299e-04
Loss = 2.9019e-02, PNorm = 159.1905, GNorm = 0.4163, lr_0 = 5.4262e-04
Loss = 3.0995e-02, PNorm = 159.2269, GNorm = 0.5399, lr_0 = 5.4225e-04
Loss = 3.2157e-02, PNorm = 159.2663, GNorm = 0.2817, lr_0 = 5.4187e-04
Loss = 3.1949e-02, PNorm = 159.3068, GNorm = 0.3272, lr_0 = 5.4150e-04
Loss = 2.6494e-02, PNorm = 159.3427, GNorm = 0.7401, lr_0 = 5.4113e-04
Loss = 3.3381e-02, PNorm = 159.3777, GNorm = 0.4916, lr_0 = 5.4076e-04
Loss = 3.5611e-02, PNorm = 159.4184, GNorm = 0.3322, lr_0 = 5.4039e-04
Loss = 2.7225e-02, PNorm = 159.4556, GNorm = 0.5127, lr_0 = 5.4002e-04
Loss = 3.2346e-02, PNorm = 159.4866, GNorm = 0.2390, lr_0 = 5.3965e-04
Loss = 2.8926e-02, PNorm = 159.5219, GNorm = 0.3927, lr_0 = 5.3928e-04
Loss = 2.2292e-02, PNorm = 159.5524, GNorm = 0.2257, lr_0 = 5.3891e-04
Loss = 3.0194e-02, PNorm = 159.5880, GNorm = 0.3782, lr_0 = 5.3854e-04
Loss = 2.6272e-02, PNorm = 159.6268, GNorm = 0.4024, lr_0 = 5.3817e-04
Loss = 3.0458e-02, PNorm = 159.6660, GNorm = 0.4722, lr_0 = 5.3781e-04
Loss = 2.5418e-02, PNorm = 159.6991, GNorm = 0.5249, lr_0 = 5.3744e-04
Loss = 3.1268e-02, PNorm = 159.7321, GNorm = 0.3938, lr_0 = 5.3707e-04
Loss = 3.2822e-02, PNorm = 159.7684, GNorm = 0.6702, lr_0 = 5.3670e-04
Loss = 3.6628e-02, PNorm = 159.8074, GNorm = 0.3269, lr_0 = 5.3633e-04
Loss = 3.7794e-02, PNorm = 159.8445, GNorm = 0.5954, lr_0 = 5.3597e-04
Loss = 4.0457e-02, PNorm = 159.8916, GNorm = 0.4959, lr_0 = 5.3560e-04
Loss = 3.0487e-02, PNorm = 159.9342, GNorm = 0.3543, lr_0 = 5.3523e-04
Loss = 2.9806e-02, PNorm = 159.9755, GNorm = 0.3083, lr_0 = 5.3486e-04
Loss = 3.6212e-02, PNorm = 160.0136, GNorm = 0.5104, lr_0 = 5.3450e-04
Loss = 3.2938e-02, PNorm = 160.0519, GNorm = 0.5455, lr_0 = 5.3413e-04
Loss = 3.3575e-02, PNorm = 160.0927, GNorm = 0.3525, lr_0 = 5.3377e-04
Loss = 3.5303e-02, PNorm = 160.1339, GNorm = 0.5440, lr_0 = 5.3340e-04
Loss = 2.8112e-02, PNorm = 160.1742, GNorm = 0.2982, lr_0 = 5.3304e-04
Loss = 3.3299e-02, PNorm = 160.2136, GNorm = 0.7713, lr_0 = 5.3267e-04
Loss = 3.5180e-02, PNorm = 160.2477, GNorm = 0.6289, lr_0 = 5.3231e-04
Loss = 3.9580e-02, PNorm = 160.2949, GNorm = 0.3719, lr_0 = 5.3194e-04
Loss = 3.5386e-02, PNorm = 160.3398, GNorm = 0.3282, lr_0 = 5.3158e-04
Loss = 2.9968e-02, PNorm = 160.3831, GNorm = 0.6435, lr_0 = 5.3121e-04
Loss = 2.9585e-02, PNorm = 160.4255, GNorm = 0.7113, lr_0 = 5.3085e-04
Loss = 2.9132e-02, PNorm = 160.4659, GNorm = 0.3455, lr_0 = 5.3048e-04
Loss = 3.8228e-02, PNorm = 160.5049, GNorm = 0.4951, lr_0 = 5.3012e-04
Loss = 3.0381e-02, PNorm = 160.5448, GNorm = 0.4255, lr_0 = 5.2976e-04
Loss = 3.2626e-02, PNorm = 160.5836, GNorm = 0.6360, lr_0 = 5.2939e-04
Loss = 3.4963e-02, PNorm = 160.6251, GNorm = 0.1823, lr_0 = 5.2903e-04
Loss = 3.6690e-02, PNorm = 160.6671, GNorm = 0.3636, lr_0 = 5.2867e-04
Loss = 3.9891e-02, PNorm = 160.7113, GNorm = 0.5258, lr_0 = 5.2831e-04
Loss = 4.0088e-02, PNorm = 160.7565, GNorm = 0.5913, lr_0 = 5.2795e-04
Loss = 3.0133e-02, PNorm = 160.8008, GNorm = 0.8149, lr_0 = 5.2758e-04
Loss = 2.9728e-02, PNorm = 160.8440, GNorm = 0.3804, lr_0 = 5.2722e-04
Loss = 3.6488e-02, PNorm = 160.8855, GNorm = 0.5410, lr_0 = 5.2686e-04
Loss = 2.6861e-02, PNorm = 160.9301, GNorm = 0.3118, lr_0 = 5.2650e-04
Loss = 3.1669e-02, PNorm = 160.9698, GNorm = 0.4959, lr_0 = 5.2614e-04
Loss = 3.4135e-02, PNorm = 161.0130, GNorm = 0.2884, lr_0 = 5.2578e-04
Loss = 3.1579e-02, PNorm = 161.0517, GNorm = 0.4633, lr_0 = 5.2542e-04
Loss = 4.4850e-02, PNorm = 161.0893, GNorm = 0.4605, lr_0 = 5.2506e-04
Loss = 3.2848e-02, PNorm = 161.1279, GNorm = 0.2692, lr_0 = 5.2470e-04
Loss = 3.7577e-02, PNorm = 161.1681, GNorm = 0.2396, lr_0 = 5.2434e-04
Loss = 3.3177e-02, PNorm = 161.2107, GNorm = 0.5412, lr_0 = 5.2398e-04
Loss = 3.6336e-02, PNorm = 161.2524, GNorm = 0.5075, lr_0 = 5.2362e-04
Loss = 3.5268e-02, PNorm = 161.2980, GNorm = 0.7922, lr_0 = 5.2326e-04
Loss = 3.5741e-02, PNorm = 161.3375, GNorm = 0.3221, lr_0 = 5.2290e-04
Loss = 3.9652e-02, PNorm = 161.3833, GNorm = 0.3184, lr_0 = 5.2255e-04
Loss = 4.4306e-02, PNorm = 161.4309, GNorm = 0.5564, lr_0 = 5.2219e-04
Loss = 4.0001e-02, PNorm = 161.4794, GNorm = 0.3574, lr_0 = 5.2183e-04
Loss = 4.8037e-02, PNorm = 161.5282, GNorm = 0.6729, lr_0 = 5.2147e-04
Loss = 3.7094e-02, PNorm = 161.5752, GNorm = 0.4264, lr_0 = 5.2112e-04
Loss = 3.4761e-02, PNorm = 161.6235, GNorm = 0.3790, lr_0 = 5.2076e-04
Loss = 3.2329e-02, PNorm = 161.6662, GNorm = 0.5243, lr_0 = 5.2040e-04
Loss = 2.9102e-02, PNorm = 161.7081, GNorm = 0.3821, lr_0 = 5.2005e-04
Loss = 3.6911e-02, PNorm = 161.7489, GNorm = 0.4385, lr_0 = 5.1969e-04
Loss = 2.8354e-02, PNorm = 161.7893, GNorm = 0.2948, lr_0 = 5.1933e-04
Loss = 3.6079e-02, PNorm = 161.8280, GNorm = 0.3043, lr_0 = 5.1898e-04
Loss = 3.7770e-02, PNorm = 161.8671, GNorm = 0.6161, lr_0 = 5.1862e-04
Loss = 3.8992e-02, PNorm = 161.9085, GNorm = 0.2579, lr_0 = 5.1827e-04
Loss = 3.4815e-02, PNorm = 161.9480, GNorm = 0.7251, lr_0 = 5.1791e-04
Validation mae = 0.123088
Epoch 10
Loss = 2.9905e-02, PNorm = 161.9812, GNorm = 0.3247, lr_0 = 5.1756e-04
Loss = 3.0979e-02, PNorm = 162.0164, GNorm = 0.3600, lr_0 = 5.1720e-04
Loss = 3.4597e-02, PNorm = 162.0459, GNorm = 0.8199, lr_0 = 5.1685e-04
Loss = 2.9450e-02, PNorm = 162.0742, GNorm = 0.4659, lr_0 = 5.1649e-04
Loss = 3.2308e-02, PNorm = 162.1025, GNorm = 0.4181, lr_0 = 5.1614e-04
Loss = 2.9931e-02, PNorm = 162.1267, GNorm = 0.1922, lr_0 = 5.1579e-04
Loss = 3.0948e-02, PNorm = 162.1577, GNorm = 0.5405, lr_0 = 5.1543e-04
Loss = 2.5590e-02, PNorm = 162.1920, GNorm = 0.4069, lr_0 = 5.1508e-04
Loss = 2.3987e-02, PNorm = 162.2222, GNorm = 0.2106, lr_0 = 5.1473e-04
Loss = 3.1790e-02, PNorm = 162.2490, GNorm = 0.4570, lr_0 = 5.1437e-04
Loss = 2.7950e-02, PNorm = 162.2741, GNorm = 0.2677, lr_0 = 5.1402e-04
Loss = 2.9110e-02, PNorm = 162.2991, GNorm = 0.4023, lr_0 = 5.1367e-04
Loss = 2.5153e-02, PNorm = 162.3287, GNorm = 0.6977, lr_0 = 5.1332e-04
Loss = 2.6164e-02, PNorm = 162.3563, GNorm = 0.2472, lr_0 = 5.1297e-04
Loss = 2.1067e-02, PNorm = 162.3827, GNorm = 0.6318, lr_0 = 5.1262e-04
Loss = 2.5698e-02, PNorm = 162.4092, GNorm = 0.4533, lr_0 = 5.1226e-04
Loss = 4.2201e-02, PNorm = 162.4432, GNorm = 0.2758, lr_0 = 5.1191e-04
Loss = 3.0332e-02, PNorm = 162.4704, GNorm = 0.5898, lr_0 = 5.1156e-04
Loss = 2.6335e-02, PNorm = 162.5049, GNorm = 0.2392, lr_0 = 5.1121e-04
Loss = 3.4812e-02, PNorm = 162.5359, GNorm = 0.8277, lr_0 = 5.1086e-04
Loss = 3.2117e-02, PNorm = 162.5687, GNorm = 0.5606, lr_0 = 5.1051e-04
Loss = 2.3601e-02, PNorm = 162.6022, GNorm = 0.7529, lr_0 = 5.1016e-04
Loss = 2.8651e-02, PNorm = 162.6247, GNorm = 0.4175, lr_0 = 5.0981e-04
Loss = 2.5777e-02, PNorm = 162.6513, GNorm = 0.3777, lr_0 = 5.0946e-04
Loss = 2.7345e-02, PNorm = 162.6765, GNorm = 0.3356, lr_0 = 5.0911e-04
Loss = 2.2308e-02, PNorm = 162.7027, GNorm = 0.6715, lr_0 = 5.0877e-04
Loss = 2.2533e-02, PNorm = 162.7270, GNorm = 0.2250, lr_0 = 5.0842e-04
Loss = 2.5515e-02, PNorm = 162.7523, GNorm = 0.5003, lr_0 = 5.0807e-04
Loss = 2.3930e-02, PNorm = 162.7844, GNorm = 0.5761, lr_0 = 5.0772e-04
Loss = 2.5009e-02, PNorm = 162.8109, GNorm = 0.4289, lr_0 = 5.0737e-04
Loss = 3.0678e-02, PNorm = 162.8341, GNorm = 0.5467, lr_0 = 5.0703e-04
Loss = 2.8144e-02, PNorm = 162.8638, GNorm = 0.5229, lr_0 = 5.0668e-04
Loss = 2.4754e-02, PNorm = 162.8900, GNorm = 0.4902, lr_0 = 5.0633e-04
Loss = 2.3096e-02, PNorm = 162.9189, GNorm = 0.2094, lr_0 = 5.0598e-04
Loss = 2.8610e-02, PNorm = 162.9464, GNorm = 0.3571, lr_0 = 5.0564e-04
Loss = 3.0969e-02, PNorm = 162.9782, GNorm = 0.3991, lr_0 = 5.0529e-04
Loss = 2.7386e-02, PNorm = 163.0127, GNorm = 0.7694, lr_0 = 5.0494e-04
Loss = 2.2751e-02, PNorm = 163.0457, GNorm = 0.5022, lr_0 = 5.0460e-04
Loss = 2.7320e-02, PNorm = 163.0763, GNorm = 0.4059, lr_0 = 5.0425e-04
Loss = 2.2820e-02, PNorm = 163.1069, GNorm = 0.4148, lr_0 = 5.0391e-04
Loss = 3.3378e-02, PNorm = 163.1365, GNorm = 0.3461, lr_0 = 5.0356e-04
Loss = 2.5302e-02, PNorm = 163.1718, GNorm = 0.5193, lr_0 = 5.0322e-04
Loss = 2.4411e-02, PNorm = 163.2022, GNorm = 0.2126, lr_0 = 5.0287e-04
Loss = 3.0982e-02, PNorm = 163.2330, GNorm = 0.3046, lr_0 = 5.0253e-04
Loss = 2.6961e-02, PNorm = 163.2671, GNorm = 0.3590, lr_0 = 5.0218e-04
Loss = 2.8145e-02, PNorm = 163.2980, GNorm = 0.3777, lr_0 = 5.0184e-04
Loss = 2.9850e-02, PNorm = 163.3301, GNorm = 0.3483, lr_0 = 5.0150e-04
Loss = 2.6215e-02, PNorm = 163.3658, GNorm = 0.5549, lr_0 = 5.0115e-04
Loss = 3.3624e-02, PNorm = 163.3981, GNorm = 0.2814, lr_0 = 5.0081e-04
Loss = 3.0944e-02, PNorm = 163.4331, GNorm = 0.4447, lr_0 = 5.0047e-04
Loss = 2.7986e-02, PNorm = 163.4667, GNorm = 0.5969, lr_0 = 5.0012e-04
Loss = 3.1228e-02, PNorm = 163.5000, GNorm = 0.2335, lr_0 = 4.9978e-04
Loss = 3.2831e-02, PNorm = 163.5361, GNorm = 0.2667, lr_0 = 4.9944e-04
Loss = 2.6348e-02, PNorm = 163.5682, GNorm = 0.2466, lr_0 = 4.9910e-04
Loss = 2.7955e-02, PNorm = 163.6018, GNorm = 0.5065, lr_0 = 4.9875e-04
Loss = 2.2584e-02, PNorm = 163.6328, GNorm = 0.2654, lr_0 = 4.9841e-04
Loss = 2.2622e-02, PNorm = 163.6635, GNorm = 0.3687, lr_0 = 4.9807e-04
Loss = 2.3113e-02, PNorm = 163.6932, GNorm = 0.3618, lr_0 = 4.9773e-04
Loss = 3.0893e-02, PNorm = 163.7262, GNorm = 0.4729, lr_0 = 4.9739e-04
Loss = 2.5171e-02, PNorm = 163.7605, GNorm = 0.2829, lr_0 = 4.9705e-04
Loss = 2.2619e-02, PNorm = 163.7953, GNorm = 0.1857, lr_0 = 4.9671e-04
Loss = 2.1195e-02, PNorm = 163.8295, GNorm = 0.4636, lr_0 = 4.9637e-04
Loss = 2.5948e-02, PNorm = 163.8555, GNorm = 0.4413, lr_0 = 4.9603e-04
Loss = 4.4192e-02, PNorm = 163.8836, GNorm = 0.2067, lr_0 = 4.9569e-04
Loss = 3.1242e-02, PNorm = 163.9211, GNorm = 0.5447, lr_0 = 4.9535e-04
Loss = 2.7036e-02, PNorm = 163.9540, GNorm = 0.4163, lr_0 = 4.9501e-04
Loss = 2.3163e-02, PNorm = 163.9910, GNorm = 0.2758, lr_0 = 4.9467e-04
Loss = 2.8894e-02, PNorm = 164.0313, GNorm = 0.5437, lr_0 = 4.9433e-04
Loss = 2.5393e-02, PNorm = 164.0687, GNorm = 0.3753, lr_0 = 4.9399e-04
Loss = 2.4633e-02, PNorm = 164.1037, GNorm = 0.2871, lr_0 = 4.9365e-04
Loss = 3.1664e-02, PNorm = 164.1326, GNorm = 0.5631, lr_0 = 4.9332e-04
Loss = 2.6422e-02, PNorm = 164.1700, GNorm = 0.3943, lr_0 = 4.9298e-04
Loss = 2.7262e-02, PNorm = 164.2105, GNorm = 0.3515, lr_0 = 4.9264e-04
Loss = 2.8268e-02, PNorm = 164.2440, GNorm = 0.3567, lr_0 = 4.9230e-04
Loss = 2.6531e-02, PNorm = 164.2798, GNorm = 0.2779, lr_0 = 4.9197e-04
Loss = 2.8783e-02, PNorm = 164.3150, GNorm = 0.6736, lr_0 = 4.9163e-04
Loss = 2.5910e-02, PNorm = 164.3525, GNorm = 0.2676, lr_0 = 4.9129e-04
Loss = 3.5641e-02, PNorm = 164.3921, GNorm = 0.2952, lr_0 = 4.9095e-04
Loss = 3.6891e-02, PNorm = 164.4332, GNorm = 0.5080, lr_0 = 4.9062e-04
Loss = 3.4607e-02, PNorm = 164.4716, GNorm = 0.7106, lr_0 = 4.9028e-04
Loss = 3.4680e-02, PNorm = 164.5073, GNorm = 0.2590, lr_0 = 4.8995e-04
Loss = 2.6779e-02, PNorm = 164.5447, GNorm = 0.5162, lr_0 = 4.8961e-04
Loss = 2.9392e-02, PNorm = 164.5835, GNorm = 0.3582, lr_0 = 4.8928e-04
Loss = 2.7502e-02, PNorm = 164.6149, GNorm = 0.3657, lr_0 = 4.8894e-04
Loss = 2.7350e-02, PNorm = 164.6506, GNorm = 0.3238, lr_0 = 4.8861e-04
Loss = 2.1674e-02, PNorm = 164.6842, GNorm = 0.2806, lr_0 = 4.8827e-04
Loss = 2.2511e-02, PNorm = 164.7155, GNorm = 0.4703, lr_0 = 4.8794e-04
Loss = 3.3259e-02, PNorm = 164.7472, GNorm = 0.5474, lr_0 = 4.8760e-04
Loss = 2.6234e-02, PNorm = 164.7827, GNorm = 0.2423, lr_0 = 4.8727e-04
Loss = 3.0772e-02, PNorm = 164.8113, GNorm = 0.4458, lr_0 = 4.8693e-04
Loss = 3.0290e-02, PNorm = 164.8462, GNorm = 0.5253, lr_0 = 4.8660e-04
Loss = 2.5136e-02, PNorm = 164.8814, GNorm = 0.2522, lr_0 = 4.8627e-04
Loss = 2.3179e-02, PNorm = 164.9169, GNorm = 0.3002, lr_0 = 4.8593e-04
Loss = 2.2801e-02, PNorm = 164.9535, GNorm = 0.1950, lr_0 = 4.8560e-04
Loss = 2.5960e-02, PNorm = 164.9876, GNorm = 0.6907, lr_0 = 4.8527e-04
Loss = 2.4203e-02, PNorm = 165.0180, GNorm = 0.4852, lr_0 = 4.8494e-04
Loss = 3.0695e-02, PNorm = 165.0538, GNorm = 0.6662, lr_0 = 4.8460e-04
Loss = 2.4821e-02, PNorm = 165.0856, GNorm = 0.4407, lr_0 = 4.8427e-04
Loss = 2.8999e-02, PNorm = 165.1213, GNorm = 0.4422, lr_0 = 4.8394e-04
Loss = 3.0178e-02, PNorm = 165.1609, GNorm = 0.4073, lr_0 = 4.8361e-04
Loss = 2.2368e-02, PNorm = 165.1975, GNorm = 0.7041, lr_0 = 4.8328e-04
Loss = 2.7083e-02, PNorm = 165.2300, GNorm = 0.6256, lr_0 = 4.8295e-04
Loss = 3.1790e-02, PNorm = 165.2619, GNorm = 0.4810, lr_0 = 4.8262e-04
Loss = 2.3447e-02, PNorm = 165.2954, GNorm = 0.3179, lr_0 = 4.8228e-04
Loss = 3.2103e-02, PNorm = 165.3304, GNorm = 0.4519, lr_0 = 4.8195e-04
Loss = 2.7405e-02, PNorm = 165.3636, GNorm = 0.5727, lr_0 = 4.8162e-04
Loss = 2.9387e-02, PNorm = 165.4037, GNorm = 0.2938, lr_0 = 4.8129e-04
Loss = 2.4983e-02, PNorm = 165.4397, GNorm = 0.5414, lr_0 = 4.8096e-04
Loss = 2.2109e-02, PNorm = 165.4741, GNorm = 0.3868, lr_0 = 4.8064e-04
Loss = 2.7567e-02, PNorm = 165.5067, GNorm = 0.4158, lr_0 = 4.8031e-04
Loss = 2.3979e-02, PNorm = 165.5393, GNorm = 0.3526, lr_0 = 4.7998e-04
Loss = 2.6379e-02, PNorm = 165.5744, GNorm = 0.7449, lr_0 = 4.7965e-04
Loss = 2.1687e-02, PNorm = 165.6109, GNorm = 0.2626, lr_0 = 4.7932e-04
Loss = 2.0238e-02, PNorm = 165.6457, GNorm = 0.4138, lr_0 = 4.7899e-04
Loss = 2.9798e-02, PNorm = 165.6791, GNorm = 0.3855, lr_0 = 4.7866e-04
Loss = 2.3969e-02, PNorm = 165.7115, GNorm = 0.3794, lr_0 = 4.7833e-04
Loss = 1.9510e-02, PNorm = 165.7436, GNorm = 0.3633, lr_0 = 4.7801e-04
Loss = 2.4054e-02, PNorm = 165.7765, GNorm = 0.5657, lr_0 = 4.7768e-04
Loss = 3.1206e-02, PNorm = 165.8086, GNorm = 0.6526, lr_0 = 4.7735e-04
Loss = 2.7737e-02, PNorm = 165.8486, GNorm = 0.4590, lr_0 = 4.7703e-04
Validation mae = 0.122770
Epoch 11
Loss = 2.3061e-02, PNorm = 165.8835, GNorm = 0.3170, lr_0 = 4.7670e-04
Loss = 3.7356e-02, PNorm = 165.9151, GNorm = 0.4352, lr_0 = 4.7637e-04
Loss = 2.1321e-02, PNorm = 165.9417, GNorm = 0.5036, lr_0 = 4.7605e-04
Loss = 2.4810e-02, PNorm = 165.9673, GNorm = 0.3024, lr_0 = 4.7572e-04
Loss = 2.9274e-02, PNorm = 165.9896, GNorm = 0.2666, lr_0 = 4.7539e-04
Loss = 2.3454e-02, PNorm = 166.0130, GNorm = 0.4569, lr_0 = 4.7507e-04
Loss = 2.0948e-02, PNorm = 166.0347, GNorm = 0.3542, lr_0 = 4.7474e-04
Loss = 2.3707e-02, PNorm = 166.0544, GNorm = 0.3332, lr_0 = 4.7442e-04
Loss = 2.5429e-02, PNorm = 166.0759, GNorm = 0.2916, lr_0 = 4.7409e-04
Loss = 2.3738e-02, PNorm = 166.0968, GNorm = 0.3732, lr_0 = 4.7377e-04
Loss = 2.4214e-02, PNorm = 166.1193, GNorm = 0.2631, lr_0 = 4.7344e-04
Loss = 1.7542e-02, PNorm = 166.1377, GNorm = 0.2421, lr_0 = 4.7312e-04
Loss = 2.6943e-02, PNorm = 166.1587, GNorm = 0.2969, lr_0 = 4.7279e-04
Loss = 2.4228e-02, PNorm = 166.1859, GNorm = 0.5098, lr_0 = 4.7247e-04
Loss = 2.6934e-02, PNorm = 166.2103, GNorm = 0.2839, lr_0 = 4.7215e-04
Loss = 2.6408e-02, PNorm = 166.2381, GNorm = 0.2973, lr_0 = 4.7182e-04
Loss = 1.9720e-02, PNorm = 166.2639, GNorm = 0.4325, lr_0 = 4.7150e-04
Loss = 2.2220e-02, PNorm = 166.2879, GNorm = 0.1635, lr_0 = 4.7118e-04
Loss = 2.0978e-02, PNorm = 166.3094, GNorm = 0.5033, lr_0 = 4.7085e-04
Loss = 2.0052e-02, PNorm = 166.3335, GNorm = 0.3218, lr_0 = 4.7053e-04
Loss = 2.2127e-02, PNorm = 166.3586, GNorm = 0.4666, lr_0 = 4.7021e-04
Loss = 2.3463e-02, PNorm = 166.3836, GNorm = 0.7933, lr_0 = 4.6989e-04
Loss = 2.7075e-02, PNorm = 166.4071, GNorm = 0.2010, lr_0 = 4.6957e-04
Loss = 2.3443e-02, PNorm = 166.4338, GNorm = 0.2347, lr_0 = 4.6924e-04
Loss = 2.5086e-02, PNorm = 166.4599, GNorm = 0.5622, lr_0 = 4.6892e-04
Loss = 2.5321e-02, PNorm = 166.4868, GNorm = 0.8356, lr_0 = 4.6860e-04
Loss = 2.2845e-02, PNorm = 166.5093, GNorm = 0.3732, lr_0 = 4.6828e-04
Loss = 2.0023e-02, PNorm = 166.5335, GNorm = 0.4005, lr_0 = 4.6796e-04
Loss = 2.3736e-02, PNorm = 166.5614, GNorm = 0.5422, lr_0 = 4.6764e-04
Loss = 2.9605e-02, PNorm = 166.5876, GNorm = 0.2582, lr_0 = 4.6732e-04
Loss = 2.2338e-02, PNorm = 166.6189, GNorm = 0.4941, lr_0 = 4.6700e-04
Loss = 2.3074e-02, PNorm = 166.6503, GNorm = 0.2930, lr_0 = 4.6668e-04
Loss = 1.8437e-02, PNorm = 166.6802, GNorm = 0.3619, lr_0 = 4.6636e-04
Loss = 2.2374e-02, PNorm = 166.7046, GNorm = 0.4715, lr_0 = 4.6604e-04
Loss = 2.3374e-02, PNorm = 166.7297, GNorm = 0.2547, lr_0 = 4.6572e-04
Loss = 2.4353e-02, PNorm = 166.7532, GNorm = 0.1993, lr_0 = 4.6540e-04
Loss = 2.4831e-02, PNorm = 166.7819, GNorm = 0.4618, lr_0 = 4.6508e-04
Loss = 2.1515e-02, PNorm = 166.8054, GNorm = 0.3103, lr_0 = 4.6476e-04
Loss = 2.5319e-02, PNorm = 166.8335, GNorm = 0.2338, lr_0 = 4.6445e-04
Loss = 2.1763e-02, PNorm = 166.8579, GNorm = 0.2542, lr_0 = 4.6413e-04
Loss = 2.2223e-02, PNorm = 166.8809, GNorm = 0.1884, lr_0 = 4.6381e-04
Loss = 1.8829e-02, PNorm = 166.9106, GNorm = 0.3737, lr_0 = 4.6349e-04
Loss = 2.3731e-02, PNorm = 166.9347, GNorm = 0.6722, lr_0 = 4.6317e-04
Loss = 2.5428e-02, PNorm = 166.9579, GNorm = 1.8239, lr_0 = 4.6286e-04
Loss = 2.6039e-02, PNorm = 166.9798, GNorm = 0.2673, lr_0 = 4.6254e-04
Loss = 2.8072e-02, PNorm = 167.0045, GNorm = 0.2778, lr_0 = 4.6222e-04
Loss = 2.8817e-02, PNorm = 167.0340, GNorm = 0.4709, lr_0 = 4.6191e-04
Loss = 2.6429e-02, PNorm = 167.0645, GNorm = 0.4583, lr_0 = 4.6159e-04
Loss = 2.4741e-02, PNorm = 167.0943, GNorm = 0.8711, lr_0 = 4.6127e-04
Loss = 1.7936e-02, PNorm = 167.1216, GNorm = 0.3365, lr_0 = 4.6096e-04
Loss = 2.6799e-02, PNorm = 167.1452, GNorm = 0.4834, lr_0 = 4.6064e-04
Loss = 1.7963e-02, PNorm = 167.1775, GNorm = 0.2571, lr_0 = 4.6033e-04
Loss = 1.8039e-02, PNorm = 167.2058, GNorm = 0.2910, lr_0 = 4.6001e-04
Loss = 2.2512e-02, PNorm = 167.2366, GNorm = 0.3200, lr_0 = 4.5970e-04
Loss = 1.8087e-02, PNorm = 167.2576, GNorm = 0.3540, lr_0 = 4.5938e-04
Loss = 2.0502e-02, PNorm = 167.2832, GNorm = 0.2942, lr_0 = 4.5907e-04
Loss = 1.8318e-02, PNorm = 167.3075, GNorm = 0.1881, lr_0 = 4.5875e-04
Loss = 2.1773e-02, PNorm = 167.3312, GNorm = 0.4552, lr_0 = 4.5844e-04
Loss = 2.1175e-02, PNorm = 167.3575, GNorm = 0.2263, lr_0 = 4.5812e-04
Loss = 2.4557e-02, PNorm = 167.3828, GNorm = 0.7891, lr_0 = 4.5781e-04
Loss = 1.9887e-02, PNorm = 167.4130, GNorm = 0.3614, lr_0 = 4.5750e-04
Loss = 2.2427e-02, PNorm = 167.4382, GNorm = 0.5070, lr_0 = 4.5718e-04
Loss = 2.1589e-02, PNorm = 167.4664, GNorm = 0.2068, lr_0 = 4.5687e-04
Loss = 2.1995e-02, PNorm = 167.4972, GNorm = 0.2670, lr_0 = 4.5656e-04
Loss = 1.8345e-02, PNorm = 167.5265, GNorm = 0.2021, lr_0 = 4.5624e-04
Loss = 1.8993e-02, PNorm = 167.5570, GNorm = 0.3347, lr_0 = 4.5593e-04
Loss = 2.0030e-02, PNorm = 167.5818, GNorm = 0.3713, lr_0 = 4.5562e-04
Loss = 2.3828e-02, PNorm = 167.6045, GNorm = 0.2655, lr_0 = 4.5531e-04
Loss = 1.6770e-02, PNorm = 167.6306, GNorm = 0.1674, lr_0 = 4.5499e-04
Loss = 2.1725e-02, PNorm = 167.6540, GNorm = 0.4473, lr_0 = 4.5468e-04
Loss = 1.9122e-02, PNorm = 167.6835, GNorm = 0.2126, lr_0 = 4.5437e-04
Loss = 2.3058e-02, PNorm = 167.7142, GNorm = 0.2105, lr_0 = 4.5406e-04
Loss = 2.4503e-02, PNorm = 167.7458, GNorm = 0.6515, lr_0 = 4.5375e-04
Loss = 2.3250e-02, PNorm = 167.7727, GNorm = 0.3853, lr_0 = 4.5344e-04
Loss = 1.9884e-02, PNorm = 167.8023, GNorm = 0.4486, lr_0 = 4.5313e-04
Loss = 3.1822e-02, PNorm = 167.8335, GNorm = 0.4194, lr_0 = 4.5282e-04
Loss = 2.3656e-02, PNorm = 167.8648, GNorm = 0.3195, lr_0 = 4.5251e-04
Loss = 2.0262e-02, PNorm = 167.8929, GNorm = 0.5178, lr_0 = 4.5220e-04
Loss = 2.8548e-02, PNorm = 167.9142, GNorm = 0.2684, lr_0 = 4.5189e-04
Loss = 2.3326e-02, PNorm = 167.9415, GNorm = 0.5847, lr_0 = 4.5158e-04
Loss = 2.9343e-02, PNorm = 167.9720, GNorm = 0.3453, lr_0 = 4.5127e-04
Loss = 2.2046e-02, PNorm = 168.0045, GNorm = 0.2540, lr_0 = 4.5096e-04
Loss = 2.1258e-02, PNorm = 168.0347, GNorm = 0.3049, lr_0 = 4.5065e-04
Loss = 2.3591e-02, PNorm = 168.0645, GNorm = 0.2236, lr_0 = 4.5034e-04
Loss = 2.1190e-02, PNorm = 168.0922, GNorm = 0.2079, lr_0 = 4.5003e-04
Loss = 2.4311e-02, PNorm = 168.1185, GNorm = 0.5981, lr_0 = 4.4972e-04
Loss = 1.8871e-02, PNorm = 168.1474, GNorm = 0.3411, lr_0 = 4.4942e-04
Loss = 2.0978e-02, PNorm = 168.1740, GNorm = 0.4753, lr_0 = 4.4911e-04
Loss = 3.0416e-02, PNorm = 168.1996, GNorm = 0.4170, lr_0 = 4.4880e-04
Loss = 2.2230e-02, PNorm = 168.2284, GNorm = 0.2671, lr_0 = 4.4849e-04
Loss = 1.9999e-02, PNorm = 168.2561, GNorm = 0.5351, lr_0 = 4.4819e-04
Loss = 1.9194e-02, PNorm = 168.2834, GNorm = 0.4336, lr_0 = 4.4788e-04
Loss = 2.0891e-02, PNorm = 168.3127, GNorm = 0.5275, lr_0 = 4.4757e-04
Loss = 1.8692e-02, PNorm = 168.3407, GNorm = 0.2868, lr_0 = 4.4727e-04
Loss = 1.8035e-02, PNorm = 168.3676, GNorm = 0.2507, lr_0 = 4.4696e-04
Loss = 2.0092e-02, PNorm = 168.3897, GNorm = 0.3167, lr_0 = 4.4665e-04
Loss = 2.1784e-02, PNorm = 168.4151, GNorm = 0.2185, lr_0 = 4.4635e-04
Loss = 2.9767e-02, PNorm = 168.4435, GNorm = 0.5731, lr_0 = 4.4604e-04
Loss = 1.9083e-02, PNorm = 168.4726, GNorm = 0.6047, lr_0 = 4.4574e-04
Loss = 2.3585e-02, PNorm = 168.4992, GNorm = 0.2171, lr_0 = 4.4543e-04
Loss = 2.9814e-02, PNorm = 168.5298, GNorm = 0.2871, lr_0 = 4.4513e-04
Loss = 2.3780e-02, PNorm = 168.5586, GNorm = 0.4015, lr_0 = 4.4482e-04
Loss = 2.6823e-02, PNorm = 168.5897, GNorm = 0.9404, lr_0 = 4.4452e-04
Loss = 2.8405e-02, PNorm = 168.6177, GNorm = 0.3185, lr_0 = 4.4421e-04
Loss = 3.3799e-02, PNorm = 168.6455, GNorm = 0.2100, lr_0 = 4.4391e-04
Loss = 1.9584e-02, PNorm = 168.6738, GNorm = 0.4560, lr_0 = 4.4360e-04
Loss = 2.2480e-02, PNorm = 168.6985, GNorm = 0.2153, lr_0 = 4.4330e-04
Loss = 2.3410e-02, PNorm = 168.7245, GNorm = 0.5994, lr_0 = 4.4299e-04
Loss = 2.4520e-02, PNorm = 168.7517, GNorm = 0.6582, lr_0 = 4.4269e-04
Loss = 2.1888e-02, PNorm = 168.7802, GNorm = 0.5869, lr_0 = 4.4239e-04
Loss = 3.5169e-02, PNorm = 168.8106, GNorm = 0.4382, lr_0 = 4.4209e-04
Loss = 2.6437e-02, PNorm = 168.8448, GNorm = 0.2738, lr_0 = 4.4178e-04
Loss = 2.6084e-02, PNorm = 168.8753, GNorm = 0.3786, lr_0 = 4.4148e-04
Loss = 2.6066e-02, PNorm = 168.9055, GNorm = 0.3820, lr_0 = 4.4118e-04
Loss = 2.4786e-02, PNorm = 168.9257, GNorm = 0.3444, lr_0 = 4.4088e-04
Loss = 2.1025e-02, PNorm = 168.9545, GNorm = 0.4688, lr_0 = 4.4057e-04
Loss = 2.1284e-02, PNorm = 168.9802, GNorm = 0.3637, lr_0 = 4.4027e-04
Loss = 2.7949e-02, PNorm = 169.0090, GNorm = 0.3003, lr_0 = 4.3997e-04
Loss = 2.5226e-02, PNorm = 169.0425, GNorm = 0.3359, lr_0 = 4.3967e-04
Loss = 2.8627e-02, PNorm = 169.0757, GNorm = 1.2836, lr_0 = 4.3937e-04
Validation mae = 0.122410
Epoch 12
Loss = 1.9507e-02, PNorm = 169.1010, GNorm = 0.4035, lr_0 = 4.3907e-04
Loss = 2.1016e-02, PNorm = 169.1236, GNorm = 0.2179, lr_0 = 4.3877e-04
Loss = 2.3559e-02, PNorm = 169.1475, GNorm = 0.4004, lr_0 = 4.3846e-04
Loss = 1.9045e-02, PNorm = 169.1726, GNorm = 0.2565, lr_0 = 4.3816e-04
Loss = 2.0829e-02, PNorm = 169.1975, GNorm = 0.2684, lr_0 = 4.3786e-04
Loss = 2.1747e-02, PNorm = 169.2203, GNorm = 0.2674, lr_0 = 4.3756e-04
Loss = 1.8351e-02, PNorm = 169.2418, GNorm = 0.2534, lr_0 = 4.3726e-04
Loss = 2.1492e-02, PNorm = 169.2640, GNorm = 0.2750, lr_0 = 4.3696e-04
Loss = 1.9110e-02, PNorm = 169.2858, GNorm = 0.2645, lr_0 = 4.3667e-04
Loss = 1.6080e-02, PNorm = 169.3070, GNorm = 0.1362, lr_0 = 4.3637e-04
Loss = 2.0244e-02, PNorm = 169.3284, GNorm = 0.6021, lr_0 = 4.3607e-04
Loss = 2.5479e-02, PNorm = 169.3496, GNorm = 0.2791, lr_0 = 4.3577e-04
Loss = 1.8741e-02, PNorm = 169.3701, GNorm = 0.2947, lr_0 = 4.3547e-04
Loss = 1.9849e-02, PNorm = 169.3927, GNorm = 0.3446, lr_0 = 4.3517e-04
Loss = 2.3427e-02, PNorm = 169.4116, GNorm = 0.1640, lr_0 = 4.3487e-04
Loss = 1.6882e-02, PNorm = 169.4295, GNorm = 0.2782, lr_0 = 4.3458e-04
Loss = 2.0862e-02, PNorm = 169.4506, GNorm = 0.2949, lr_0 = 4.3428e-04
Loss = 2.0673e-02, PNorm = 169.4707, GNorm = 0.3791, lr_0 = 4.3398e-04
Loss = 1.8969e-02, PNorm = 169.4934, GNorm = 0.2586, lr_0 = 4.3368e-04
Loss = 2.6204e-02, PNorm = 169.5131, GNorm = 0.8310, lr_0 = 4.3339e-04
Loss = 1.9467e-02, PNorm = 169.5363, GNorm = 0.1757, lr_0 = 4.3309e-04
Loss = 1.9348e-02, PNorm = 169.5587, GNorm = 0.4530, lr_0 = 4.3279e-04
Loss = 1.6344e-02, PNorm = 169.5804, GNorm = 0.3082, lr_0 = 4.3250e-04
Loss = 1.7820e-02, PNorm = 169.6006, GNorm = 0.2045, lr_0 = 4.3220e-04
Loss = 1.7936e-02, PNorm = 169.6261, GNorm = 0.4160, lr_0 = 4.3190e-04
Loss = 1.6841e-02, PNorm = 169.6478, GNorm = 0.3226, lr_0 = 4.3161e-04
Loss = 1.7205e-02, PNorm = 169.6679, GNorm = 0.2031, lr_0 = 4.3131e-04
Loss = 1.6760e-02, PNorm = 169.6878, GNorm = 0.2692, lr_0 = 4.3102e-04
Loss = 1.5478e-02, PNorm = 169.7089, GNorm = 0.2979, lr_0 = 4.3072e-04
Loss = 1.9030e-02, PNorm = 169.7252, GNorm = 0.3630, lr_0 = 4.3043e-04
Loss = 1.8699e-02, PNorm = 169.7463, GNorm = 0.2291, lr_0 = 4.3013e-04
Loss = 1.9526e-02, PNorm = 169.7654, GNorm = 0.2180, lr_0 = 4.2984e-04
Loss = 1.8630e-02, PNorm = 169.7841, GNorm = 0.2636, lr_0 = 4.2954e-04
Loss = 2.1201e-02, PNorm = 169.8046, GNorm = 0.3004, lr_0 = 4.2925e-04
Loss = 1.8037e-02, PNorm = 169.8290, GNorm = 0.1981, lr_0 = 4.2895e-04
Loss = 1.8252e-02, PNorm = 169.8509, GNorm = 0.1770, lr_0 = 4.2866e-04
Loss = 1.7358e-02, PNorm = 169.8710, GNorm = 0.2628, lr_0 = 4.2837e-04
Loss = 1.7737e-02, PNorm = 169.8877, GNorm = 0.2307, lr_0 = 4.2807e-04
Loss = 2.4249e-02, PNorm = 169.9096, GNorm = 0.2899, lr_0 = 4.2778e-04
Loss = 1.8202e-02, PNorm = 169.9292, GNorm = 0.2305, lr_0 = 4.2749e-04
Loss = 1.7500e-02, PNorm = 169.9508, GNorm = 0.2506, lr_0 = 4.2719e-04
Loss = 1.5158e-02, PNorm = 169.9726, GNorm = 0.2202, lr_0 = 4.2690e-04
Loss = 2.0783e-02, PNorm = 169.9904, GNorm = 0.2317, lr_0 = 4.2661e-04
Loss = 2.8054e-02, PNorm = 170.0097, GNorm = 0.3957, lr_0 = 4.2632e-04
Loss = 1.7922e-02, PNorm = 170.0345, GNorm = 0.3338, lr_0 = 4.2602e-04
Loss = 2.1356e-02, PNorm = 170.0592, GNorm = 0.4672, lr_0 = 4.2573e-04
Loss = 2.2119e-02, PNorm = 170.0840, GNorm = 0.2318, lr_0 = 4.2544e-04
Loss = 1.6496e-02, PNorm = 170.1095, GNorm = 0.2007, lr_0 = 4.2515e-04
Loss = 2.4720e-02, PNorm = 170.1325, GNorm = 0.3271, lr_0 = 4.2486e-04
Loss = 1.7906e-02, PNorm = 170.1582, GNorm = 0.1944, lr_0 = 4.2457e-04
Loss = 1.3576e-02, PNorm = 170.1836, GNorm = 0.2660, lr_0 = 4.2428e-04
Loss = 2.1163e-02, PNorm = 170.2101, GNorm = 0.2885, lr_0 = 4.2399e-04
Loss = 1.8422e-02, PNorm = 170.2316, GNorm = 0.1696, lr_0 = 4.2370e-04
Loss = 1.9982e-02, PNorm = 170.2510, GNorm = 0.2602, lr_0 = 4.2340e-04
Loss = 2.2472e-02, PNorm = 170.2783, GNorm = 0.2651, lr_0 = 4.2311e-04
Loss = 1.8539e-02, PNorm = 170.3022, GNorm = 0.5629, lr_0 = 4.2283e-04
Loss = 1.6711e-02, PNorm = 170.3282, GNorm = 0.2941, lr_0 = 4.2254e-04
Loss = 1.8323e-02, PNorm = 170.3496, GNorm = 0.3657, lr_0 = 4.2225e-04
Loss = 1.5640e-02, PNorm = 170.3719, GNorm = 0.2107, lr_0 = 4.2196e-04
Loss = 1.9237e-02, PNorm = 170.3960, GNorm = 0.4098, lr_0 = 4.2167e-04
Loss = 1.3141e-02, PNorm = 170.4188, GNorm = 0.2456, lr_0 = 4.2138e-04
Loss = 2.0431e-02, PNorm = 170.4395, GNorm = 0.3577, lr_0 = 4.2109e-04
Loss = 2.3720e-02, PNorm = 170.4625, GNorm = 0.8486, lr_0 = 4.2080e-04
Loss = 2.0836e-02, PNorm = 170.4836, GNorm = 0.2318, lr_0 = 4.2051e-04
Loss = 1.9336e-02, PNorm = 170.5076, GNorm = 0.3318, lr_0 = 4.2023e-04
Loss = 1.9951e-02, PNorm = 170.5275, GNorm = 0.1883, lr_0 = 4.1994e-04
Loss = 1.9135e-02, PNorm = 170.5520, GNorm = 0.3410, lr_0 = 4.1965e-04
Loss = 2.0134e-02, PNorm = 170.5768, GNorm = 0.2897, lr_0 = 4.1936e-04
Loss = 2.0709e-02, PNorm = 170.6023, GNorm = 0.4757, lr_0 = 4.1907e-04
Loss = 2.4269e-02, PNorm = 170.6283, GNorm = 0.2479, lr_0 = 4.1879e-04
Loss = 2.1525e-02, PNorm = 170.6517, GNorm = 0.5243, lr_0 = 4.1850e-04
Loss = 1.6157e-02, PNorm = 170.6753, GNorm = 0.4120, lr_0 = 4.1821e-04
Loss = 1.6313e-02, PNorm = 170.6997, GNorm = 0.2304, lr_0 = 4.1793e-04
Loss = 2.3480e-02, PNorm = 170.7240, GNorm = 0.4261, lr_0 = 4.1764e-04
Loss = 3.1491e-02, PNorm = 170.7495, GNorm = 0.2683, lr_0 = 4.1736e-04
Loss = 1.6289e-02, PNorm = 170.7718, GNorm = 0.3747, lr_0 = 4.1707e-04
Loss = 1.9147e-02, PNorm = 170.7939, GNorm = 1.0964, lr_0 = 4.1678e-04
Loss = 1.9220e-02, PNorm = 170.8208, GNorm = 0.5552, lr_0 = 4.1650e-04
Loss = 2.2555e-02, PNorm = 170.8447, GNorm = 0.5217, lr_0 = 4.1621e-04
Loss = 1.9586e-02, PNorm = 170.8716, GNorm = 0.3626, lr_0 = 4.1593e-04
Loss = 1.9025e-02, PNorm = 170.8975, GNorm = 0.2608, lr_0 = 4.1564e-04
Loss = 2.4717e-02, PNorm = 170.9215, GNorm = 0.5396, lr_0 = 4.1536e-04
Loss = 1.7289e-02, PNorm = 170.9428, GNorm = 0.4199, lr_0 = 4.1507e-04
Loss = 1.6571e-02, PNorm = 170.9683, GNorm = 0.2566, lr_0 = 4.1479e-04
Loss = 1.5973e-02, PNorm = 170.9912, GNorm = 0.4437, lr_0 = 4.1450e-04
Loss = 2.0638e-02, PNorm = 171.0113, GNorm = 0.3823, lr_0 = 4.1422e-04
Loss = 1.4131e-02, PNorm = 171.0341, GNorm = 0.5062, lr_0 = 4.1394e-04
Loss = 2.3188e-02, PNorm = 171.0600, GNorm = 0.2954, lr_0 = 4.1365e-04
Loss = 2.4511e-02, PNorm = 171.0897, GNorm = 0.5604, lr_0 = 4.1337e-04
Loss = 2.3337e-02, PNorm = 171.1156, GNorm = 0.2442, lr_0 = 4.1309e-04
Loss = 1.6721e-02, PNorm = 171.1383, GNorm = 0.2705, lr_0 = 4.1280e-04
Loss = 1.7564e-02, PNorm = 171.1616, GNorm = 0.3405, lr_0 = 4.1252e-04
Loss = 1.6934e-02, PNorm = 171.1841, GNorm = 0.3169, lr_0 = 4.1224e-04
Loss = 1.6987e-02, PNorm = 171.2094, GNorm = 0.4155, lr_0 = 4.1196e-04
Loss = 1.9662e-02, PNorm = 171.2318, GNorm = 0.6732, lr_0 = 4.1167e-04
Loss = 2.0535e-02, PNorm = 171.2602, GNorm = 0.3329, lr_0 = 4.1139e-04
Loss = 2.1729e-02, PNorm = 171.2889, GNorm = 0.2579, lr_0 = 4.1111e-04
Loss = 1.5393e-02, PNorm = 171.3171, GNorm = 0.3587, lr_0 = 4.1083e-04
Loss = 1.9990e-02, PNorm = 171.3344, GNorm = 0.3487, lr_0 = 4.1055e-04
Loss = 1.7520e-02, PNorm = 171.3560, GNorm = 0.5580, lr_0 = 4.1027e-04
Loss = 1.8507e-02, PNorm = 171.3776, GNorm = 0.3718, lr_0 = 4.0998e-04
Loss = 2.8353e-02, PNorm = 171.4036, GNorm = 0.2807, lr_0 = 4.0970e-04
Loss = 2.3218e-02, PNorm = 171.4291, GNorm = 0.4629, lr_0 = 4.0942e-04
Loss = 1.5518e-02, PNorm = 171.4514, GNorm = 0.3190, lr_0 = 4.0914e-04
Loss = 1.7759e-02, PNorm = 171.4767, GNorm = 0.3192, lr_0 = 4.0886e-04
Loss = 1.5645e-02, PNorm = 171.4978, GNorm = 0.3575, lr_0 = 4.0858e-04
Loss = 1.7094e-02, PNorm = 171.5187, GNorm = 0.4038, lr_0 = 4.0830e-04
Loss = 2.8473e-02, PNorm = 171.5404, GNorm = 0.2056, lr_0 = 4.0802e-04
Loss = 2.1294e-02, PNorm = 171.5651, GNorm = 0.2508, lr_0 = 4.0774e-04
Loss = 2.4984e-02, PNorm = 171.5907, GNorm = 0.7869, lr_0 = 4.0746e-04
Loss = 1.4733e-02, PNorm = 171.6143, GNorm = 0.2833, lr_0 = 4.0718e-04
Loss = 2.0329e-02, PNorm = 171.6369, GNorm = 0.1870, lr_0 = 4.0691e-04
Loss = 1.9698e-02, PNorm = 171.6597, GNorm = 0.2585, lr_0 = 4.0663e-04
Loss = 4.1862e-02, PNorm = 171.6870, GNorm = 0.1555, lr_0 = 4.0635e-04
Loss = 1.7299e-02, PNorm = 171.7109, GNorm = 0.4299, lr_0 = 4.0607e-04
Loss = 2.1219e-02, PNorm = 171.7377, GNorm = 0.6083, lr_0 = 4.0579e-04
Loss = 1.8449e-02, PNorm = 171.7603, GNorm = 0.3433, lr_0 = 4.0551e-04
Loss = 2.2497e-02, PNorm = 171.7825, GNorm = 0.3999, lr_0 = 4.0524e-04
Loss = 1.8817e-02, PNorm = 171.8100, GNorm = 0.2207, lr_0 = 4.0496e-04
Loss = 2.3808e-02, PNorm = 171.8395, GNorm = 0.2623, lr_0 = 4.0468e-04
Validation mae = 0.122327
Epoch 13
Loss = 1.7021e-02, PNorm = 171.8626, GNorm = 0.5543, lr_0 = 4.0440e-04
Loss = 1.9065e-02, PNorm = 171.8780, GNorm = 0.2442, lr_0 = 4.0413e-04
Loss = 1.9069e-02, PNorm = 171.8932, GNorm = 0.2503, lr_0 = 4.0385e-04
Loss = 1.9512e-02, PNorm = 171.9093, GNorm = 0.2868, lr_0 = 4.0357e-04
Loss = 1.5886e-02, PNorm = 171.9242, GNorm = 0.2781, lr_0 = 4.0330e-04
Loss = 1.5690e-02, PNorm = 171.9391, GNorm = 0.5198, lr_0 = 4.0302e-04
Loss = 1.9506e-02, PNorm = 171.9556, GNorm = 0.5498, lr_0 = 4.0274e-04
Loss = 1.6421e-02, PNorm = 171.9748, GNorm = 0.3416, lr_0 = 4.0247e-04
Loss = 1.5988e-02, PNorm = 171.9974, GNorm = 0.3345, lr_0 = 4.0219e-04
Loss = 1.8041e-02, PNorm = 172.0144, GNorm = 0.3694, lr_0 = 4.0192e-04
Loss = 1.9857e-02, PNorm = 172.0304, GNorm = 0.2837, lr_0 = 4.0164e-04
Loss = 1.8325e-02, PNorm = 172.0459, GNorm = 0.2311, lr_0 = 4.0137e-04
Loss = 1.5791e-02, PNorm = 172.0607, GNorm = 0.4345, lr_0 = 4.0109e-04
Loss = 2.2231e-02, PNorm = 172.0765, GNorm = 0.7534, lr_0 = 4.0082e-04
Loss = 2.1044e-02, PNorm = 172.0951, GNorm = 0.3854, lr_0 = 4.0054e-04
Loss = 1.7155e-02, PNorm = 172.1090, GNorm = 0.1341, lr_0 = 4.0027e-04
Loss = 1.5451e-02, PNorm = 172.1272, GNorm = 0.2375, lr_0 = 3.9999e-04
Loss = 1.6304e-02, PNorm = 172.1469, GNorm = 0.3058, lr_0 = 3.9972e-04
Loss = 1.5256e-02, PNorm = 172.1626, GNorm = 0.2037, lr_0 = 3.9945e-04
Loss = 1.6292e-02, PNorm = 172.1798, GNorm = 0.3833, lr_0 = 3.9917e-04
Loss = 2.0473e-02, PNorm = 172.2000, GNorm = 1.1354, lr_0 = 3.9890e-04
Loss = 1.9735e-02, PNorm = 172.2190, GNorm = 0.2762, lr_0 = 3.9863e-04
Loss = 2.1259e-02, PNorm = 172.2397, GNorm = 0.8092, lr_0 = 3.9835e-04
Loss = 1.4692e-02, PNorm = 172.2537, GNorm = 0.2053, lr_0 = 3.9808e-04
Loss = 2.0520e-02, PNorm = 172.2686, GNorm = 0.2748, lr_0 = 3.9781e-04
Loss = 1.4203e-02, PNorm = 172.2822, GNorm = 0.4697, lr_0 = 3.9753e-04
Loss = 1.8475e-02, PNorm = 172.2958, GNorm = 0.1527, lr_0 = 3.9726e-04
Loss = 1.4670e-02, PNorm = 172.3127, GNorm = 0.2352, lr_0 = 3.9699e-04
Loss = 1.4377e-02, PNorm = 172.3277, GNorm = 0.4038, lr_0 = 3.9672e-04
Loss = 1.8177e-02, PNorm = 172.3446, GNorm = 0.2239, lr_0 = 3.9645e-04
Loss = 1.6023e-02, PNorm = 172.3667, GNorm = 0.1888, lr_0 = 3.9617e-04
Loss = 1.3917e-02, PNorm = 172.3856, GNorm = 0.4290, lr_0 = 3.9590e-04
Loss = 1.6956e-02, PNorm = 172.4048, GNorm = 0.4492, lr_0 = 3.9563e-04
Loss = 1.6859e-02, PNorm = 172.4236, GNorm = 0.5553, lr_0 = 3.9536e-04
Loss = 1.4355e-02, PNorm = 172.4394, GNorm = 0.3065, lr_0 = 3.9509e-04
Loss = 1.7026e-02, PNorm = 172.4569, GNorm = 0.4100, lr_0 = 3.9482e-04
Loss = 1.4812e-02, PNorm = 172.4758, GNorm = 0.2237, lr_0 = 3.9455e-04
Loss = 1.6447e-02, PNorm = 172.4924, GNorm = 0.1897, lr_0 = 3.9428e-04
Loss = 1.5541e-02, PNorm = 172.5099, GNorm = 0.2745, lr_0 = 3.9401e-04
Loss = 1.4035e-02, PNorm = 172.5268, GNorm = 0.1786, lr_0 = 3.9374e-04
Loss = 1.6583e-02, PNorm = 172.5470, GNorm = 0.3177, lr_0 = 3.9347e-04
Loss = 1.3476e-02, PNorm = 172.5658, GNorm = 0.2014, lr_0 = 3.9320e-04
Loss = 1.3296e-02, PNorm = 172.5809, GNorm = 0.1477, lr_0 = 3.9293e-04
Loss = 2.0650e-02, PNorm = 172.5971, GNorm = 0.2673, lr_0 = 3.9266e-04
Loss = 1.5481e-02, PNorm = 172.6147, GNorm = 0.2118, lr_0 = 3.9239e-04
Loss = 1.9496e-02, PNorm = 172.6311, GNorm = 0.1523, lr_0 = 3.9212e-04
Loss = 1.3737e-02, PNorm = 172.6465, GNorm = 0.2730, lr_0 = 3.9185e-04
Loss = 1.7831e-02, PNorm = 172.6652, GNorm = 0.1440, lr_0 = 3.9159e-04
Loss = 1.6047e-02, PNorm = 172.6839, GNorm = 0.5440, lr_0 = 3.9132e-04
Loss = 1.6322e-02, PNorm = 172.7024, GNorm = 0.1876, lr_0 = 3.9105e-04
Loss = 1.4404e-02, PNorm = 172.7220, GNorm = 0.4319, lr_0 = 3.9078e-04
Loss = 1.7795e-02, PNorm = 172.7404, GNorm = 0.2611, lr_0 = 3.9051e-04
Loss = 1.6688e-02, PNorm = 172.7609, GNorm = 0.3781, lr_0 = 3.9025e-04
Loss = 1.5382e-02, PNorm = 172.7755, GNorm = 0.8262, lr_0 = 3.8998e-04
Loss = 1.8487e-02, PNorm = 172.7896, GNorm = 0.3720, lr_0 = 3.8971e-04
Loss = 1.2674e-02, PNorm = 172.8086, GNorm = 0.1994, lr_0 = 3.8945e-04
Loss = 1.3856e-02, PNorm = 172.8247, GNorm = 0.1947, lr_0 = 3.8918e-04
Loss = 1.7142e-02, PNorm = 172.8433, GNorm = 0.1816, lr_0 = 3.8891e-04
Loss = 2.2310e-02, PNorm = 172.8626, GNorm = 0.3077, lr_0 = 3.8865e-04
Loss = 1.3804e-02, PNorm = 172.8814, GNorm = 0.3180, lr_0 = 3.8838e-04
Loss = 1.4637e-02, PNorm = 172.8956, GNorm = 0.2955, lr_0 = 3.8811e-04
Loss = 1.3625e-02, PNorm = 172.9104, GNorm = 0.2754, lr_0 = 3.8785e-04
Loss = 1.7086e-02, PNorm = 172.9254, GNorm = 0.4307, lr_0 = 3.8758e-04
Loss = 1.2991e-02, PNorm = 172.9433, GNorm = 0.2460, lr_0 = 3.8732e-04
Loss = 1.4513e-02, PNorm = 172.9612, GNorm = 0.1768, lr_0 = 3.8705e-04
Loss = 1.3093e-02, PNorm = 172.9778, GNorm = 0.3375, lr_0 = 3.8679e-04
Loss = 1.9160e-02, PNorm = 172.9903, GNorm = 0.5436, lr_0 = 3.8652e-04
Loss = 1.5018e-02, PNorm = 173.0082, GNorm = 0.2680, lr_0 = 3.8626e-04
Loss = 1.5197e-02, PNorm = 173.0296, GNorm = 0.5523, lr_0 = 3.8599e-04
Loss = 1.3823e-02, PNorm = 173.0485, GNorm = 0.2380, lr_0 = 3.8573e-04
Loss = 2.0210e-02, PNorm = 173.0699, GNorm = 0.2601, lr_0 = 3.8546e-04
Loss = 1.6658e-02, PNorm = 173.0893, GNorm = 0.4585, lr_0 = 3.8520e-04
Loss = 1.3464e-02, PNorm = 173.1098, GNorm = 0.1555, lr_0 = 3.8493e-04
Loss = 1.8494e-02, PNorm = 173.1321, GNorm = 0.3222, lr_0 = 3.8467e-04
Loss = 1.7340e-02, PNorm = 173.1538, GNorm = 0.1939, lr_0 = 3.8441e-04
Loss = 1.2405e-02, PNorm = 173.1754, GNorm = 0.2972, lr_0 = 3.8414e-04
Loss = 3.0685e-02, PNorm = 173.1963, GNorm = 0.2889, lr_0 = 3.8388e-04
Loss = 2.0140e-02, PNorm = 173.2212, GNorm = 0.7309, lr_0 = 3.8362e-04
Loss = 1.4179e-02, PNorm = 173.2406, GNorm = 0.1986, lr_0 = 3.8336e-04
Loss = 1.9978e-02, PNorm = 173.2566, GNorm = 0.9407, lr_0 = 3.8309e-04
Loss = 1.7820e-02, PNorm = 173.2806, GNorm = 0.5451, lr_0 = 3.8283e-04
Loss = 1.5686e-02, PNorm = 173.2993, GNorm = 0.2397, lr_0 = 3.8257e-04
Loss = 2.1076e-02, PNorm = 173.3233, GNorm = 0.3674, lr_0 = 3.8231e-04
Loss = 1.5584e-02, PNorm = 173.3414, GNorm = 0.3990, lr_0 = 3.8204e-04
Loss = 1.4189e-02, PNorm = 173.3592, GNorm = 0.4372, lr_0 = 3.8178e-04
Loss = 1.5821e-02, PNorm = 173.3787, GNorm = 0.2097, lr_0 = 3.8152e-04
Loss = 1.5990e-02, PNorm = 173.3969, GNorm = 0.1646, lr_0 = 3.8126e-04
Loss = 1.6392e-02, PNorm = 173.4196, GNorm = 0.1992, lr_0 = 3.8100e-04
Loss = 2.2396e-02, PNorm = 173.4391, GNorm = 0.3294, lr_0 = 3.8074e-04
Loss = 1.7314e-02, PNorm = 173.4621, GNorm = 0.2407, lr_0 = 3.8048e-04
Loss = 1.5146e-02, PNorm = 173.4828, GNorm = 0.3540, lr_0 = 3.8022e-04
Loss = 1.5074e-02, PNorm = 173.5032, GNorm = 0.3349, lr_0 = 3.7995e-04
Loss = 1.4312e-02, PNorm = 173.5231, GNorm = 0.4242, lr_0 = 3.7969e-04
Loss = 1.6259e-02, PNorm = 173.5427, GNorm = 0.2496, lr_0 = 3.7943e-04
Loss = 1.6085e-02, PNorm = 173.5647, GNorm = 0.2955, lr_0 = 3.7917e-04
Loss = 1.3120e-02, PNorm = 173.5847, GNorm = 0.3949, lr_0 = 3.7891e-04
Loss = 1.9861e-02, PNorm = 173.6007, GNorm = 0.3559, lr_0 = 3.7866e-04
Loss = 1.6916e-02, PNorm = 173.6238, GNorm = 0.2513, lr_0 = 3.7840e-04
Loss = 2.5934e-02, PNorm = 173.6464, GNorm = 0.2828, lr_0 = 3.7814e-04
Loss = 2.0917e-02, PNorm = 173.6668, GNorm = 0.2373, lr_0 = 3.7788e-04
Loss = 1.6824e-02, PNorm = 173.6865, GNorm = 0.2405, lr_0 = 3.7762e-04
Loss = 1.7384e-02, PNorm = 173.7057, GNorm = 0.2748, lr_0 = 3.7736e-04
Loss = 2.6494e-02, PNorm = 173.7300, GNorm = 0.3894, lr_0 = 3.7710e-04
Loss = 2.2269e-02, PNorm = 173.7557, GNorm = 0.5201, lr_0 = 3.7684e-04
Loss = 1.8701e-02, PNorm = 173.7803, GNorm = 0.3434, lr_0 = 3.7659e-04
Loss = 1.3229e-02, PNorm = 173.8044, GNorm = 0.1696, lr_0 = 3.7633e-04
Loss = 1.4854e-02, PNorm = 173.8233, GNorm = 0.1691, lr_0 = 3.7607e-04
Loss = 1.5859e-02, PNorm = 173.8420, GNorm = 0.3515, lr_0 = 3.7581e-04
Loss = 1.5665e-02, PNorm = 173.8637, GNorm = 0.2717, lr_0 = 3.7555e-04
Loss = 2.0074e-02, PNorm = 173.8857, GNorm = 1.6464, lr_0 = 3.7530e-04
Loss = 1.3497e-02, PNorm = 173.9065, GNorm = 0.2957, lr_0 = 3.7504e-04
Loss = 1.5458e-02, PNorm = 173.9227, GNorm = 0.3659, lr_0 = 3.7478e-04
Loss = 1.6528e-02, PNorm = 173.9404, GNorm = 0.1844, lr_0 = 3.7453e-04
Loss = 1.5097e-02, PNorm = 173.9578, GNorm = 0.3194, lr_0 = 3.7427e-04
Loss = 1.2914e-02, PNorm = 173.9767, GNorm = 0.1623, lr_0 = 3.7401e-04
Loss = 1.5119e-02, PNorm = 173.9984, GNorm = 0.2840, lr_0 = 3.7376e-04
Loss = 1.4678e-02, PNorm = 174.0172, GNorm = 0.2376, lr_0 = 3.7350e-04
Loss = 1.7165e-02, PNorm = 174.0363, GNorm = 0.5489, lr_0 = 3.7325e-04
Loss = 1.5836e-02, PNorm = 174.0567, GNorm = 0.4107, lr_0 = 3.7299e-04
Loss = 1.7575e-02, PNorm = 174.0775, GNorm = 0.3536, lr_0 = 3.7273e-04
Validation mae = 0.122119
Epoch 14
Loss = 1.5597e-02, PNorm = 174.0921, GNorm = 0.1551, lr_0 = 3.7248e-04
Loss = 1.2290e-02, PNorm = 174.1054, GNorm = 0.6925, lr_0 = 3.7222e-04
Loss = 1.4515e-02, PNorm = 174.1207, GNorm = 0.1181, lr_0 = 3.7197e-04
Loss = 1.5229e-02, PNorm = 174.1334, GNorm = 0.1450, lr_0 = 3.7171e-04
Loss = 1.9792e-02, PNorm = 174.1479, GNorm = 0.3488, lr_0 = 3.7146e-04
Loss = 1.6905e-02, PNorm = 174.1665, GNorm = 0.3613, lr_0 = 3.7120e-04
Loss = 1.2549e-02, PNorm = 174.1847, GNorm = 0.1843, lr_0 = 3.7095e-04
Loss = 1.7372e-02, PNorm = 174.1985, GNorm = 0.3089, lr_0 = 3.7070e-04
Loss = 1.4183e-02, PNorm = 174.2127, GNorm = 0.2148, lr_0 = 3.7044e-04
Loss = 1.5416e-02, PNorm = 174.2250, GNorm = 0.5466, lr_0 = 3.7019e-04
Loss = 1.5041e-02, PNorm = 174.2359, GNorm = 0.4336, lr_0 = 3.6993e-04
Loss = 1.5210e-02, PNorm = 174.2530, GNorm = 0.5729, lr_0 = 3.6968e-04
Loss = 1.7473e-02, PNorm = 174.2683, GNorm = 0.3291, lr_0 = 3.6943e-04
Loss = 1.7512e-02, PNorm = 174.2848, GNorm = 0.1978, lr_0 = 3.6917e-04
Loss = 1.2519e-02, PNorm = 174.2985, GNorm = 0.2965, lr_0 = 3.6892e-04
Loss = 1.2921e-02, PNorm = 174.3107, GNorm = 0.2721, lr_0 = 3.6867e-04
Loss = 1.3698e-02, PNorm = 174.3230, GNorm = 0.3391, lr_0 = 3.6842e-04
Loss = 1.3059e-02, PNorm = 174.3350, GNorm = 0.1866, lr_0 = 3.6816e-04
Loss = 1.1667e-02, PNorm = 174.3514, GNorm = 0.5346, lr_0 = 3.6791e-04
Loss = 1.3985e-02, PNorm = 174.3666, GNorm = 0.6107, lr_0 = 3.6766e-04
Loss = 1.0392e-02, PNorm = 174.3778, GNorm = 0.2018, lr_0 = 3.6741e-04
Loss = 1.0319e-02, PNorm = 174.3876, GNorm = 0.3550, lr_0 = 3.6716e-04
Loss = 1.3832e-02, PNorm = 174.4029, GNorm = 0.3869, lr_0 = 3.6690e-04
Loss = 1.2766e-02, PNorm = 174.4159, GNorm = 0.1302, lr_0 = 3.6665e-04
Loss = 1.4336e-02, PNorm = 174.4300, GNorm = 0.7149, lr_0 = 3.6640e-04
Loss = 1.4631e-02, PNorm = 174.4477, GNorm = 0.5843, lr_0 = 3.6615e-04
Loss = 1.4913e-02, PNorm = 174.4602, GNorm = 0.1948, lr_0 = 3.6590e-04
Loss = 1.2107e-02, PNorm = 174.4765, GNorm = 0.1985, lr_0 = 3.6565e-04
Loss = 1.2746e-02, PNorm = 174.4890, GNorm = 0.1453, lr_0 = 3.6540e-04
Loss = 1.7783e-02, PNorm = 174.5006, GNorm = 0.4582, lr_0 = 3.6515e-04
Loss = 1.1505e-02, PNorm = 174.5164, GNorm = 0.2416, lr_0 = 3.6490e-04
Loss = 1.7656e-02, PNorm = 174.5325, GNorm = 0.2999, lr_0 = 3.6465e-04
Loss = 1.4967e-02, PNorm = 174.5440, GNorm = 0.4400, lr_0 = 3.6440e-04
Loss = 1.3142e-02, PNorm = 174.5588, GNorm = 0.3243, lr_0 = 3.6415e-04
Loss = 2.3007e-02, PNorm = 174.5752, GNorm = 0.2691, lr_0 = 3.6390e-04
Loss = 1.3479e-02, PNorm = 174.5884, GNorm = 0.2005, lr_0 = 3.6365e-04
Loss = 1.3668e-02, PNorm = 174.6049, GNorm = 0.2739, lr_0 = 3.6340e-04
Loss = 1.1775e-02, PNorm = 174.6204, GNorm = 0.8325, lr_0 = 3.6315e-04
Loss = 1.3495e-02, PNorm = 174.6364, GNorm = 0.1957, lr_0 = 3.6290e-04
Loss = 1.3577e-02, PNorm = 174.6485, GNorm = 0.2350, lr_0 = 3.6266e-04
Loss = 1.1898e-02, PNorm = 174.6606, GNorm = 0.1740, lr_0 = 3.6241e-04
Loss = 1.1123e-02, PNorm = 174.6730, GNorm = 0.2639, lr_0 = 3.6216e-04
Loss = 1.2192e-02, PNorm = 174.6848, GNorm = 0.2096, lr_0 = 3.6191e-04
Loss = 1.3832e-02, PNorm = 174.7034, GNorm = 0.2582, lr_0 = 3.6166e-04
Loss = 1.6926e-02, PNorm = 174.7218, GNorm = 0.3197, lr_0 = 3.6141e-04
Loss = 1.2982e-02, PNorm = 174.7379, GNorm = 0.3804, lr_0 = 3.6117e-04
Loss = 1.5960e-02, PNorm = 174.7550, GNorm = 0.2225, lr_0 = 3.6092e-04
Loss = 1.6278e-02, PNorm = 174.7715, GNorm = 0.2231, lr_0 = 3.6067e-04
Loss = 1.2999e-02, PNorm = 174.7870, GNorm = 0.3172, lr_0 = 3.6043e-04
Loss = 1.1335e-02, PNorm = 174.8048, GNorm = 0.1815, lr_0 = 3.6018e-04
Loss = 1.0772e-02, PNorm = 174.8210, GNorm = 0.1802, lr_0 = 3.5993e-04
Loss = 1.1122e-02, PNorm = 174.8379, GNorm = 0.2416, lr_0 = 3.5969e-04
Loss = 1.1124e-02, PNorm = 174.8531, GNorm = 0.1299, lr_0 = 3.5944e-04
Loss = 1.3430e-02, PNorm = 174.8664, GNorm = 0.2817, lr_0 = 3.5919e-04
Loss = 1.3007e-02, PNorm = 174.8790, GNorm = 0.6157, lr_0 = 3.5895e-04
Loss = 1.1669e-02, PNorm = 174.8939, GNorm = 0.1567, lr_0 = 3.5870e-04
Loss = 1.2703e-02, PNorm = 174.9042, GNorm = 0.1220, lr_0 = 3.5845e-04
Loss = 1.6147e-02, PNorm = 174.9207, GNorm = 0.5076, lr_0 = 3.5821e-04
Loss = 1.1435e-02, PNorm = 174.9357, GNorm = 0.2953, lr_0 = 3.5796e-04
Loss = 1.0265e-02, PNorm = 174.9531, GNorm = 0.2042, lr_0 = 3.5772e-04
Loss = 1.4590e-02, PNorm = 174.9667, GNorm = 0.1662, lr_0 = 3.5747e-04
Loss = 1.3441e-02, PNorm = 174.9840, GNorm = 0.1440, lr_0 = 3.5723e-04
Loss = 1.2356e-02, PNorm = 174.9988, GNorm = 0.1702, lr_0 = 3.5698e-04
Loss = 1.1697e-02, PNorm = 175.0153, GNorm = 0.1313, lr_0 = 3.5674e-04
Loss = 1.1923e-02, PNorm = 175.0319, GNorm = 0.3380, lr_0 = 3.5650e-04
Loss = 2.1676e-02, PNorm = 175.0498, GNorm = 0.3366, lr_0 = 3.5625e-04
Loss = 1.1681e-02, PNorm = 175.0668, GNorm = 0.3026, lr_0 = 3.5601e-04
Loss = 1.4193e-02, PNorm = 175.0829, GNorm = 0.3320, lr_0 = 3.5576e-04
Loss = 1.3789e-02, PNorm = 175.1036, GNorm = 0.5643, lr_0 = 3.5552e-04
Loss = 2.0412e-02, PNorm = 175.1206, GNorm = 0.2220, lr_0 = 3.5528e-04
Loss = 2.1638e-02, PNorm = 175.1375, GNorm = 1.2501, lr_0 = 3.5503e-04
Loss = 1.4215e-02, PNorm = 175.1512, GNorm = 0.3003, lr_0 = 3.5479e-04
Loss = 1.2063e-02, PNorm = 175.1674, GNorm = 0.4001, lr_0 = 3.5455e-04
Loss = 1.2806e-02, PNorm = 175.1830, GNorm = 0.1647, lr_0 = 3.5430e-04
Loss = 1.1608e-02, PNorm = 175.1990, GNorm = 0.2113, lr_0 = 3.5406e-04
Loss = 1.4331e-02, PNorm = 175.2145, GNorm = 0.2138, lr_0 = 3.5382e-04
Loss = 1.3837e-02, PNorm = 175.2306, GNorm = 0.1800, lr_0 = 3.5358e-04
Loss = 1.4412e-02, PNorm = 175.2468, GNorm = 0.2507, lr_0 = 3.5333e-04
Loss = 1.3514e-02, PNorm = 175.2612, GNorm = 0.1540, lr_0 = 3.5309e-04
Loss = 1.3488e-02, PNorm = 175.2789, GNorm = 0.3525, lr_0 = 3.5285e-04
Loss = 1.1887e-02, PNorm = 175.2959, GNorm = 0.1661, lr_0 = 3.5261e-04
Loss = 1.1838e-02, PNorm = 175.3125, GNorm = 0.3428, lr_0 = 3.5237e-04
Loss = 2.7854e-02, PNorm = 175.3331, GNorm = 0.4022, lr_0 = 3.5212e-04
Loss = 1.1219e-02, PNorm = 175.3533, GNorm = 0.2235, lr_0 = 3.5188e-04
Loss = 1.2321e-02, PNorm = 175.3687, GNorm = 0.3664, lr_0 = 3.5164e-04
Loss = 1.6820e-02, PNorm = 175.3877, GNorm = 0.1800, lr_0 = 3.5140e-04
Loss = 1.2405e-02, PNorm = 175.4025, GNorm = 0.1366, lr_0 = 3.5116e-04
Loss = 1.3293e-02, PNorm = 175.4169, GNorm = 0.1799, lr_0 = 3.5092e-04
Loss = 1.0754e-02, PNorm = 175.4326, GNorm = 0.1527, lr_0 = 3.5068e-04
Loss = 1.6174e-02, PNorm = 175.4495, GNorm = 0.3233, lr_0 = 3.5044e-04
Loss = 1.8802e-02, PNorm = 175.4670, GNorm = 0.2945, lr_0 = 3.5020e-04
Loss = 1.9283e-02, PNorm = 175.4850, GNorm = 0.3294, lr_0 = 3.4996e-04
Loss = 2.0216e-02, PNorm = 175.5024, GNorm = 0.2046, lr_0 = 3.4972e-04
Loss = 1.2335e-02, PNorm = 175.5175, GNorm = 0.3922, lr_0 = 3.4948e-04
Loss = 1.2312e-02, PNorm = 175.5336, GNorm = 0.1262, lr_0 = 3.4924e-04
Loss = 2.3888e-02, PNorm = 175.5521, GNorm = 0.3109, lr_0 = 3.4900e-04
Loss = 1.2512e-02, PNorm = 175.5712, GNorm = 0.3213, lr_0 = 3.4876e-04
Loss = 1.2346e-02, PNorm = 175.5886, GNorm = 0.2378, lr_0 = 3.4852e-04
Loss = 9.5337e-03, PNorm = 175.6046, GNorm = 0.2057, lr_0 = 3.4828e-04
Loss = 2.2029e-02, PNorm = 175.6189, GNorm = 0.3745, lr_0 = 3.4805e-04
Loss = 2.2124e-02, PNorm = 175.6325, GNorm = 0.2296, lr_0 = 3.4781e-04
Loss = 1.4702e-02, PNorm = 175.6507, GNorm = 0.6063, lr_0 = 3.4757e-04
Loss = 1.3653e-02, PNorm = 175.6700, GNorm = 0.2554, lr_0 = 3.4733e-04
Loss = 1.0731e-02, PNorm = 175.6890, GNorm = 0.1765, lr_0 = 3.4709e-04
Loss = 1.8525e-02, PNorm = 175.7061, GNorm = 0.1955, lr_0 = 3.4686e-04
Loss = 1.3018e-02, PNorm = 175.7225, GNorm = 0.2115, lr_0 = 3.4662e-04
Loss = 9.9997e-03, PNorm = 175.7367, GNorm = 0.1619, lr_0 = 3.4638e-04
Loss = 1.0838e-02, PNorm = 175.7508, GNorm = 0.2305, lr_0 = 3.4614e-04
Loss = 1.1863e-02, PNorm = 175.7665, GNorm = 0.2840, lr_0 = 3.4591e-04
Loss = 2.1677e-02, PNorm = 175.7856, GNorm = 0.2646, lr_0 = 3.4567e-04
Loss = 1.1059e-02, PNorm = 175.8008, GNorm = 0.4306, lr_0 = 3.4543e-04
Loss = 1.0577e-02, PNorm = 175.8154, GNorm = 0.3437, lr_0 = 3.4520e-04
Loss = 1.3102e-02, PNorm = 175.8282, GNorm = 0.3022, lr_0 = 3.4496e-04
Loss = 1.1472e-02, PNorm = 175.8410, GNorm = 0.2545, lr_0 = 3.4472e-04
Loss = 1.0581e-02, PNorm = 175.8538, GNorm = 0.3077, lr_0 = 3.4449e-04
Loss = 1.7290e-02, PNorm = 175.8679, GNorm = 0.1883, lr_0 = 3.4425e-04
Loss = 1.4202e-02, PNorm = 175.8822, GNorm = 0.1972, lr_0 = 3.4402e-04
Loss = 1.4653e-02, PNorm = 175.8972, GNorm = 0.4031, lr_0 = 3.4378e-04
Loss = 1.1346e-02, PNorm = 175.9148, GNorm = 0.1631, lr_0 = 3.4354e-04
Loss = 1.4942e-02, PNorm = 175.9338, GNorm = 0.2605, lr_0 = 3.4331e-04
Validation mae = 0.122378
Epoch 15
Loss = 1.4561e-02, PNorm = 175.9490, GNorm = 0.5749, lr_0 = 3.4307e-04
Loss = 1.5128e-02, PNorm = 175.9632, GNorm = 0.3091, lr_0 = 3.4284e-04
Loss = 1.2077e-02, PNorm = 175.9778, GNorm = 0.4417, lr_0 = 3.4260e-04
Loss = 1.5549e-02, PNorm = 175.9907, GNorm = 0.8533, lr_0 = 3.4237e-04
Loss = 1.0777e-02, PNorm = 176.0055, GNorm = 0.1470, lr_0 = 3.4213e-04
Loss = 1.1613e-02, PNorm = 176.0147, GNorm = 0.1336, lr_0 = 3.4190e-04
Loss = 1.3184e-02, PNorm = 176.0252, GNorm = 0.2714, lr_0 = 3.4167e-04
Loss = 1.0203e-02, PNorm = 176.0372, GNorm = 0.2887, lr_0 = 3.4143e-04
Loss = 9.9131e-03, PNorm = 176.0475, GNorm = 0.2452, lr_0 = 3.4120e-04
Loss = 1.1810e-02, PNorm = 176.0580, GNorm = 0.3161, lr_0 = 3.4096e-04
Loss = 1.1455e-02, PNorm = 176.0673, GNorm = 0.2338, lr_0 = 3.4073e-04
Loss = 9.6144e-03, PNorm = 176.0807, GNorm = 0.5104, lr_0 = 3.4050e-04
Loss = 1.1845e-02, PNorm = 176.0924, GNorm = 0.6433, lr_0 = 3.4026e-04
Loss = 1.0757e-02, PNorm = 176.1044, GNorm = 0.2709, lr_0 = 3.4003e-04
Loss = 1.1683e-02, PNorm = 176.1195, GNorm = 0.3211, lr_0 = 3.3980e-04
Loss = 1.2554e-02, PNorm = 176.1345, GNorm = 0.1993, lr_0 = 3.3956e-04
Loss = 1.2963e-02, PNorm = 176.1472, GNorm = 0.3324, lr_0 = 3.3933e-04
Loss = 1.3158e-02, PNorm = 176.1598, GNorm = 0.1848, lr_0 = 3.3910e-04
Loss = 1.0727e-02, PNorm = 176.1699, GNorm = 0.1361, lr_0 = 3.3887e-04
Loss = 1.0983e-02, PNorm = 176.1824, GNorm = 0.3299, lr_0 = 3.3864e-04
Loss = 1.0575e-02, PNorm = 176.1931, GNorm = 0.2063, lr_0 = 3.3840e-04
Loss = 1.1816e-02, PNorm = 176.2061, GNorm = 0.2227, lr_0 = 3.3817e-04
Loss = 1.0189e-02, PNorm = 176.2162, GNorm = 0.3409, lr_0 = 3.3794e-04
Loss = 1.0497e-02, PNorm = 176.2265, GNorm = 0.1569, lr_0 = 3.3771e-04
Loss = 9.0679e-03, PNorm = 176.2364, GNorm = 0.1983, lr_0 = 3.3748e-04
Loss = 1.3437e-02, PNorm = 176.2491, GNorm = 0.4920, lr_0 = 3.3725e-04
Loss = 1.2544e-02, PNorm = 176.2624, GNorm = 0.3268, lr_0 = 3.3701e-04
Loss = 1.2158e-02, PNorm = 176.2760, GNorm = 0.1954, lr_0 = 3.3678e-04
Loss = 1.6921e-02, PNorm = 176.2899, GNorm = 0.2418, lr_0 = 3.3655e-04
Loss = 1.0533e-02, PNorm = 176.3022, GNorm = 0.4354, lr_0 = 3.3632e-04
Loss = 1.2703e-02, PNorm = 176.3106, GNorm = 0.3322, lr_0 = 3.3609e-04
Loss = 1.1985e-02, PNorm = 176.3232, GNorm = 0.1162, lr_0 = 3.3586e-04
Loss = 1.3920e-02, PNorm = 176.3359, GNorm = 0.4134, lr_0 = 3.3563e-04
Loss = 1.1839e-02, PNorm = 176.3477, GNorm = 0.3694, lr_0 = 3.3540e-04
Loss = 1.2965e-02, PNorm = 176.3596, GNorm = 0.4476, lr_0 = 3.3517e-04
Loss = 7.8877e-03, PNorm = 176.3728, GNorm = 0.2066, lr_0 = 3.3494e-04
Loss = 1.8983e-02, PNorm = 176.3883, GNorm = 0.2392, lr_0 = 3.3471e-04
Loss = 1.1121e-02, PNorm = 176.4018, GNorm = 0.3689, lr_0 = 3.3448e-04
Loss = 8.6536e-03, PNorm = 176.4164, GNorm = 0.0993, lr_0 = 3.3425e-04
Loss = 1.2842e-02, PNorm = 176.4288, GNorm = 0.1645, lr_0 = 3.3403e-04
Loss = 1.2611e-02, PNorm = 176.4375, GNorm = 0.1400, lr_0 = 3.3380e-04
Loss = 1.4347e-02, PNorm = 176.4477, GNorm = 0.3645, lr_0 = 3.3357e-04
Loss = 1.3413e-02, PNorm = 176.4591, GNorm = 0.2054, lr_0 = 3.3334e-04
Loss = 1.8153e-02, PNorm = 176.4709, GNorm = 0.2743, lr_0 = 3.3311e-04
Loss = 1.5888e-02, PNorm = 176.4831, GNorm = 0.5876, lr_0 = 3.3288e-04
Loss = 1.3991e-02, PNorm = 176.4986, GNorm = 0.1505, lr_0 = 3.3265e-04
Loss = 1.1000e-02, PNorm = 176.5133, GNorm = 0.5409, lr_0 = 3.3243e-04
Loss = 8.3990e-03, PNorm = 176.5272, GNorm = 0.2433, lr_0 = 3.3220e-04
Loss = 1.6160e-02, PNorm = 176.5444, GNorm = 0.2220, lr_0 = 3.3197e-04
Loss = 1.1622e-02, PNorm = 176.5607, GNorm = 0.2455, lr_0 = 3.3174e-04
Loss = 1.4466e-02, PNorm = 176.5745, GNorm = 0.2383, lr_0 = 3.3152e-04
Loss = 8.8785e-03, PNorm = 176.5869, GNorm = 0.2207, lr_0 = 3.3129e-04
Loss = 1.6417e-02, PNorm = 176.5984, GNorm = 0.4711, lr_0 = 3.3106e-04
Loss = 1.0083e-02, PNorm = 176.6116, GNorm = 0.1520, lr_0 = 3.3084e-04
Loss = 1.2545e-02, PNorm = 176.6243, GNorm = 0.2775, lr_0 = 3.3061e-04
Loss = 9.1208e-03, PNorm = 176.6361, GNorm = 0.3776, lr_0 = 3.3038e-04
Loss = 1.0153e-02, PNorm = 176.6485, GNorm = 0.2988, lr_0 = 3.3016e-04
Loss = 1.2387e-02, PNorm = 176.6609, GNorm = 0.4132, lr_0 = 3.2993e-04
Loss = 1.2513e-02, PNorm = 176.6735, GNorm = 0.2926, lr_0 = 3.2970e-04
Loss = 1.1942e-02, PNorm = 176.6887, GNorm = 0.3132, lr_0 = 3.2948e-04
Loss = 9.4868e-03, PNorm = 176.6984, GNorm = 0.3214, lr_0 = 3.2925e-04
Loss = 1.1061e-02, PNorm = 176.7102, GNorm = 0.1882, lr_0 = 3.2903e-04
Loss = 1.0705e-02, PNorm = 176.7199, GNorm = 0.3290, lr_0 = 3.2880e-04
Loss = 1.1862e-02, PNorm = 176.7310, GNorm = 0.1492, lr_0 = 3.2858e-04
Loss = 1.1781e-02, PNorm = 176.7444, GNorm = 0.2301, lr_0 = 3.2835e-04
Loss = 1.2658e-02, PNorm = 176.7541, GNorm = 0.3471, lr_0 = 3.2813e-04
Loss = 1.0894e-02, PNorm = 176.7683, GNorm = 0.1211, lr_0 = 3.2790e-04
Loss = 1.0793e-02, PNorm = 176.7804, GNorm = 0.2279, lr_0 = 3.2768e-04
Loss = 1.6598e-02, PNorm = 176.7952, GNorm = 0.1876, lr_0 = 3.2745e-04
Loss = 1.3443e-02, PNorm = 176.8056, GNorm = 0.3346, lr_0 = 3.2723e-04
Loss = 1.5744e-02, PNorm = 176.8201, GNorm = 0.2620, lr_0 = 3.2700e-04
Loss = 9.2017e-03, PNorm = 176.8346, GNorm = 0.2513, lr_0 = 3.2678e-04
Loss = 1.2998e-02, PNorm = 176.8486, GNorm = 0.3576, lr_0 = 3.2656e-04
Loss = 1.3055e-02, PNorm = 176.8625, GNorm = 0.1106, lr_0 = 3.2633e-04
Loss = 1.9084e-02, PNorm = 176.8765, GNorm = 0.5971, lr_0 = 3.2611e-04
Loss = 1.3875e-02, PNorm = 176.8927, GNorm = 0.2632, lr_0 = 3.2589e-04
Loss = 2.0408e-02, PNorm = 176.9085, GNorm = 0.4493, lr_0 = 3.2566e-04
Loss = 1.1493e-02, PNorm = 176.9231, GNorm = 0.2152, lr_0 = 3.2544e-04
Loss = 1.5284e-02, PNorm = 176.9354, GNorm = 0.3197, lr_0 = 3.2522e-04
Loss = 1.4364e-02, PNorm = 176.9484, GNorm = 0.2101, lr_0 = 3.2499e-04
Loss = 1.1222e-02, PNorm = 176.9592, GNorm = 0.1374, lr_0 = 3.2477e-04
Loss = 1.7521e-02, PNorm = 176.9732, GNorm = 0.2277, lr_0 = 3.2455e-04
Loss = 1.0925e-02, PNorm = 176.9872, GNorm = 0.3736, lr_0 = 3.2433e-04
Loss = 9.6936e-03, PNorm = 177.0021, GNorm = 0.1652, lr_0 = 3.2410e-04
Loss = 9.3124e-03, PNorm = 177.0166, GNorm = 0.3025, lr_0 = 3.2388e-04
Loss = 1.2245e-02, PNorm = 177.0301, GNorm = 0.1617, lr_0 = 3.2366e-04
Loss = 1.4273e-02, PNorm = 177.0448, GNorm = 0.3042, lr_0 = 3.2344e-04
Loss = 1.0546e-02, PNorm = 177.0579, GNorm = 0.1760, lr_0 = 3.2322e-04
Loss = 1.1646e-02, PNorm = 177.0712, GNorm = 0.1618, lr_0 = 3.2300e-04
Loss = 9.8163e-03, PNorm = 177.0847, GNorm = 0.2606, lr_0 = 3.2277e-04
Loss = 1.6982e-02, PNorm = 177.0982, GNorm = 0.2623, lr_0 = 3.2255e-04
Loss = 9.0196e-03, PNorm = 177.1107, GNorm = 0.2715, lr_0 = 3.2233e-04
Loss = 8.8837e-03, PNorm = 177.1256, GNorm = 0.1726, lr_0 = 3.2211e-04
Loss = 1.1120e-02, PNorm = 177.1414, GNorm = 0.5176, lr_0 = 3.2189e-04
Loss = 1.6787e-02, PNorm = 177.1534, GNorm = 0.2499, lr_0 = 3.2167e-04
Loss = 9.1325e-03, PNorm = 177.1666, GNorm = 0.1972, lr_0 = 3.2145e-04
Loss = 1.3024e-02, PNorm = 177.1795, GNorm = 0.3048, lr_0 = 3.2123e-04
Loss = 1.5927e-02, PNorm = 177.1920, GNorm = 0.1550, lr_0 = 3.2101e-04
Loss = 9.8471e-03, PNorm = 177.2020, GNorm = 0.2755, lr_0 = 3.2079e-04
Loss = 1.8654e-02, PNorm = 177.2168, GNorm = 0.3093, lr_0 = 3.2057e-04
Loss = 1.0891e-02, PNorm = 177.2308, GNorm = 0.4268, lr_0 = 3.2035e-04
Loss = 9.8120e-03, PNorm = 177.2446, GNorm = 0.1169, lr_0 = 3.2013e-04
Loss = 1.3295e-02, PNorm = 177.2588, GNorm = 0.1461, lr_0 = 3.1991e-04
Loss = 1.0609e-02, PNorm = 177.2730, GNorm = 0.1603, lr_0 = 3.1969e-04
Loss = 1.1777e-02, PNorm = 177.2880, GNorm = 0.4308, lr_0 = 3.1947e-04
Loss = 9.7951e-03, PNorm = 177.3031, GNorm = 0.2231, lr_0 = 3.1925e-04
Loss = 1.9251e-02, PNorm = 177.3217, GNorm = 0.1486, lr_0 = 3.1904e-04
Loss = 1.2385e-02, PNorm = 177.3367, GNorm = 0.3186, lr_0 = 3.1882e-04
Loss = 1.3025e-02, PNorm = 177.3475, GNorm = 0.3895, lr_0 = 3.1860e-04
Loss = 1.0685e-02, PNorm = 177.3608, GNorm = 0.1953, lr_0 = 3.1838e-04
Loss = 1.2083e-02, PNorm = 177.3721, GNorm = 0.2585, lr_0 = 3.1816e-04
Loss = 9.6079e-03, PNorm = 177.3863, GNorm = 0.2549, lr_0 = 3.1794e-04
Loss = 1.4622e-02, PNorm = 177.3979, GNorm = 0.6697, lr_0 = 3.1773e-04
Loss = 9.6338e-03, PNorm = 177.4100, GNorm = 0.2254, lr_0 = 3.1751e-04
Loss = 9.6498e-03, PNorm = 177.4239, GNorm = 0.3442, lr_0 = 3.1729e-04
Loss = 1.0531e-02, PNorm = 177.4381, GNorm = 0.3795, lr_0 = 3.1707e-04
Loss = 1.1321e-02, PNorm = 177.4536, GNorm = 0.2527, lr_0 = 3.1686e-04
Loss = 1.5528e-02, PNorm = 177.4658, GNorm = 1.3570, lr_0 = 3.1664e-04
Loss = 1.5517e-02, PNorm = 177.4814, GNorm = 0.1467, lr_0 = 3.1642e-04
Loss = 1.4681e-02, PNorm = 177.4953, GNorm = 0.2274, lr_0 = 3.1621e-04
Validation mae = 0.122049
Epoch 16
Loss = 9.5114e-03, PNorm = 177.5092, GNorm = 0.1482, lr_0 = 3.1599e-04
Loss = 1.0285e-02, PNorm = 177.5198, GNorm = 0.2574, lr_0 = 3.1577e-04
Loss = 1.2236e-02, PNorm = 177.5281, GNorm = 0.1887, lr_0 = 3.1556e-04
Loss = 1.0938e-02, PNorm = 177.5355, GNorm = 0.2311, lr_0 = 3.1534e-04
Loss = 9.5346e-03, PNorm = 177.5460, GNorm = 0.2324, lr_0 = 3.1512e-04
Loss = 9.0892e-03, PNorm = 177.5572, GNorm = 0.2403, lr_0 = 3.1491e-04
Loss = 1.1410e-02, PNorm = 177.5666, GNorm = 0.1159, lr_0 = 3.1469e-04
Loss = 8.3065e-03, PNorm = 177.5761, GNorm = 0.1039, lr_0 = 3.1448e-04
Loss = 8.9868e-03, PNorm = 177.5866, GNorm = 0.2749, lr_0 = 3.1426e-04
Loss = 1.2177e-02, PNorm = 177.5957, GNorm = 0.2183, lr_0 = 3.1405e-04
Loss = 8.9169e-03, PNorm = 177.6053, GNorm = 0.1931, lr_0 = 3.1383e-04
Loss = 9.3952e-03, PNorm = 177.6129, GNorm = 0.2333, lr_0 = 3.1362e-04
Loss = 1.0581e-02, PNorm = 177.6213, GNorm = 0.2682, lr_0 = 3.1340e-04
Loss = 7.2706e-03, PNorm = 177.6306, GNorm = 0.1747, lr_0 = 3.1319e-04
Loss = 1.1126e-02, PNorm = 177.6396, GNorm = 0.1889, lr_0 = 3.1297e-04
Loss = 1.1394e-02, PNorm = 177.6530, GNorm = 0.2772, lr_0 = 3.1276e-04
Loss = 1.1300e-02, PNorm = 177.6643, GNorm = 0.1734, lr_0 = 3.1254e-04
Loss = 8.5880e-03, PNorm = 177.6751, GNorm = 0.2143, lr_0 = 3.1233e-04
Loss = 1.0212e-02, PNorm = 177.6853, GNorm = 0.3076, lr_0 = 3.1212e-04
Loss = 8.5590e-03, PNorm = 177.6968, GNorm = 0.1562, lr_0 = 3.1190e-04
Loss = 9.8213e-03, PNorm = 177.7055, GNorm = 0.1077, lr_0 = 3.1169e-04
Loss = 8.3737e-03, PNorm = 177.7153, GNorm = 0.2017, lr_0 = 3.1147e-04
Loss = 1.3774e-02, PNorm = 177.7251, GNorm = 0.0869, lr_0 = 3.1126e-04
Loss = 1.2805e-02, PNorm = 177.7328, GNorm = 0.3854, lr_0 = 3.1105e-04
Loss = 9.3252e-03, PNorm = 177.7442, GNorm = 0.1183, lr_0 = 3.1083e-04
Loss = 1.2020e-02, PNorm = 177.7541, GNorm = 0.3080, lr_0 = 3.1062e-04
Loss = 7.3684e-03, PNorm = 177.7625, GNorm = 0.0989, lr_0 = 3.1041e-04
Loss = 1.4611e-02, PNorm = 177.7714, GNorm = 0.1717, lr_0 = 3.1020e-04
Loss = 8.9838e-03, PNorm = 177.7781, GNorm = 0.3907, lr_0 = 3.0998e-04
Loss = 8.7966e-03, PNorm = 177.7880, GNorm = 0.1866, lr_0 = 3.0977e-04
Loss = 1.0444e-02, PNorm = 177.7972, GNorm = 0.2481, lr_0 = 3.0956e-04
Loss = 9.9920e-03, PNorm = 177.8060, GNorm = 0.1412, lr_0 = 3.0935e-04
Loss = 1.6547e-02, PNorm = 177.8196, GNorm = 0.2416, lr_0 = 3.0914e-04
Loss = 1.6211e-02, PNorm = 177.8293, GNorm = 0.5958, lr_0 = 3.0892e-04
Loss = 1.2960e-02, PNorm = 177.8419, GNorm = 0.8910, lr_0 = 3.0871e-04
Loss = 9.2814e-03, PNorm = 177.8580, GNorm = 0.3839, lr_0 = 3.0850e-04
Loss = 1.0016e-02, PNorm = 177.8703, GNorm = 0.5245, lr_0 = 3.0829e-04
Loss = 1.0450e-02, PNorm = 177.8841, GNorm = 0.2403, lr_0 = 3.0808e-04
Loss = 1.1082e-02, PNorm = 177.8953, GNorm = 0.3289, lr_0 = 3.0787e-04
Loss = 1.1132e-02, PNorm = 177.9034, GNorm = 0.3594, lr_0 = 3.0766e-04
Loss = 8.5139e-03, PNorm = 177.9148, GNorm = 0.1486, lr_0 = 3.0745e-04
Loss = 1.2543e-02, PNorm = 177.9271, GNorm = 0.1705, lr_0 = 3.0723e-04
Loss = 1.2314e-02, PNorm = 177.9370, GNorm = 1.4229, lr_0 = 3.0702e-04
Loss = 9.7645e-03, PNorm = 177.9523, GNorm = 0.1770, lr_0 = 3.0681e-04
Loss = 9.8952e-03, PNorm = 177.9663, GNorm = 0.1310, lr_0 = 3.0660e-04
Loss = 1.0524e-02, PNorm = 177.9804, GNorm = 0.2205, lr_0 = 3.0639e-04
Loss = 1.2965e-02, PNorm = 177.9920, GNorm = 0.3159, lr_0 = 3.0618e-04
Loss = 1.0584e-02, PNorm = 177.9988, GNorm = 0.3244, lr_0 = 3.0597e-04
Loss = 7.5375e-03, PNorm = 178.0081, GNorm = 0.2589, lr_0 = 3.0576e-04
Loss = 8.7727e-03, PNorm = 178.0185, GNorm = 0.1950, lr_0 = 3.0555e-04
Loss = 8.1302e-03, PNorm = 178.0275, GNorm = 0.3093, lr_0 = 3.0535e-04
Loss = 8.1962e-03, PNorm = 178.0380, GNorm = 0.3498, lr_0 = 3.0514e-04
Loss = 1.5281e-02, PNorm = 178.0487, GNorm = 0.3423, lr_0 = 3.0493e-04
Loss = 9.3339e-03, PNorm = 178.0592, GNorm = 0.1911, lr_0 = 3.0472e-04
Loss = 1.1438e-02, PNorm = 178.0688, GNorm = 0.2396, lr_0 = 3.0451e-04
Loss = 9.1106e-03, PNorm = 178.0801, GNorm = 0.1459, lr_0 = 3.0430e-04
Loss = 1.2945e-02, PNorm = 178.0933, GNorm = 0.3240, lr_0 = 3.0409e-04
Loss = 1.8544e-02, PNorm = 178.1071, GNorm = 0.1905, lr_0 = 3.0388e-04
Loss = 1.2736e-02, PNorm = 178.1176, GNorm = 0.6042, lr_0 = 3.0368e-04
Loss = 1.0034e-02, PNorm = 178.1277, GNorm = 0.3110, lr_0 = 3.0347e-04
Loss = 1.0028e-02, PNorm = 178.1385, GNorm = 0.3652, lr_0 = 3.0326e-04
Loss = 1.2755e-02, PNorm = 178.1465, GNorm = 0.6007, lr_0 = 3.0305e-04
Loss = 1.0778e-02, PNorm = 178.1622, GNorm = 0.3996, lr_0 = 3.0284e-04
Loss = 7.7678e-03, PNorm = 178.1730, GNorm = 0.1951, lr_0 = 3.0264e-04
Loss = 9.9384e-03, PNorm = 178.1844, GNorm = 0.1499, lr_0 = 3.0243e-04
Loss = 1.4381e-02, PNorm = 178.1953, GNorm = 0.4054, lr_0 = 3.0222e-04
Loss = 1.1258e-02, PNorm = 178.2077, GNorm = 0.1951, lr_0 = 3.0202e-04
Loss = 9.2011e-03, PNorm = 178.2207, GNorm = 0.1174, lr_0 = 3.0181e-04
Loss = 1.0077e-02, PNorm = 178.2345, GNorm = 0.2315, lr_0 = 3.0160e-04
Loss = 1.7399e-02, PNorm = 178.2440, GNorm = 0.7841, lr_0 = 3.0140e-04
Loss = 1.0784e-02, PNorm = 178.2539, GNorm = 0.1629, lr_0 = 3.0119e-04
Loss = 8.6312e-03, PNorm = 178.2646, GNorm = 0.2166, lr_0 = 3.0098e-04
Loss = 9.9785e-03, PNorm = 178.2753, GNorm = 0.2351, lr_0 = 3.0078e-04
Loss = 1.4915e-02, PNorm = 178.2883, GNorm = 0.1437, lr_0 = 3.0057e-04
Loss = 7.0069e-03, PNorm = 178.3017, GNorm = 0.1344, lr_0 = 3.0036e-04
Loss = 1.3975e-02, PNorm = 178.3139, GNorm = 0.6194, lr_0 = 3.0016e-04
Loss = 7.2992e-03, PNorm = 178.3254, GNorm = 0.2189, lr_0 = 2.9995e-04
Loss = 7.7002e-03, PNorm = 178.3382, GNorm = 0.1124, lr_0 = 2.9975e-04
Loss = 1.0222e-02, PNorm = 178.3520, GNorm = 0.6656, lr_0 = 2.9954e-04
Loss = 9.4378e-03, PNorm = 178.3631, GNorm = 0.1251, lr_0 = 2.9934e-04
Loss = 1.1075e-02, PNorm = 178.3717, GNorm = 0.1860, lr_0 = 2.9913e-04
Loss = 9.4533e-03, PNorm = 178.3817, GNorm = 0.1678, lr_0 = 2.9893e-04
Loss = 1.2316e-02, PNorm = 178.3941, GNorm = 0.2805, lr_0 = 2.9872e-04
Loss = 9.2370e-03, PNorm = 178.4077, GNorm = 0.1587, lr_0 = 2.9852e-04
Loss = 9.4400e-03, PNorm = 178.4200, GNorm = 0.1492, lr_0 = 2.9831e-04
Loss = 1.3674e-02, PNorm = 178.4286, GNorm = 0.2085, lr_0 = 2.9811e-04
Loss = 1.0212e-02, PNorm = 178.4396, GNorm = 0.3687, lr_0 = 2.9790e-04
Loss = 9.0301e-03, PNorm = 178.4531, GNorm = 0.2327, lr_0 = 2.9770e-04
Loss = 8.9044e-03, PNorm = 178.4647, GNorm = 0.1643, lr_0 = 2.9750e-04
Loss = 7.1749e-03, PNorm = 178.4764, GNorm = 0.1070, lr_0 = 2.9729e-04
Loss = 1.5103e-02, PNorm = 178.4882, GNorm = 0.2189, lr_0 = 2.9709e-04
Loss = 1.1474e-02, PNorm = 178.5003, GNorm = 0.1927, lr_0 = 2.9689e-04
Loss = 9.6831e-03, PNorm = 178.5120, GNorm = 0.1791, lr_0 = 2.9668e-04
Loss = 9.8566e-03, PNorm = 178.5238, GNorm = 0.1759, lr_0 = 2.9648e-04
Loss = 8.5424e-03, PNorm = 178.5322, GNorm = 0.2392, lr_0 = 2.9628e-04
Loss = 7.6214e-03, PNorm = 178.5424, GNorm = 0.2724, lr_0 = 2.9607e-04
Loss = 7.6364e-03, PNorm = 178.5502, GNorm = 0.2033, lr_0 = 2.9587e-04
Loss = 8.7051e-03, PNorm = 178.5609, GNorm = 0.2526, lr_0 = 2.9567e-04
Loss = 9.5864e-03, PNorm = 178.5731, GNorm = 0.1828, lr_0 = 2.9546e-04
Loss = 1.0784e-02, PNorm = 178.5855, GNorm = 0.2467, lr_0 = 2.9526e-04
Loss = 1.9102e-02, PNorm = 178.5950, GNorm = 0.4641, lr_0 = 2.9506e-04
Loss = 1.1187e-02, PNorm = 178.6062, GNorm = 0.1844, lr_0 = 2.9486e-04
Loss = 8.0426e-03, PNorm = 178.6183, GNorm = 0.1515, lr_0 = 2.9466e-04
Loss = 1.4429e-02, PNorm = 178.6293, GNorm = 0.2451, lr_0 = 2.9445e-04
Loss = 7.9516e-03, PNorm = 178.6422, GNorm = 0.1928, lr_0 = 2.9425e-04
Loss = 1.3351e-02, PNorm = 178.6512, GNorm = 0.3692, lr_0 = 2.9405e-04
Loss = 8.0924e-03, PNorm = 178.6599, GNorm = 0.2041, lr_0 = 2.9385e-04
Loss = 9.0169e-03, PNorm = 178.6699, GNorm = 0.1806, lr_0 = 2.9365e-04
Loss = 2.1967e-02, PNorm = 178.6848, GNorm = 0.3378, lr_0 = 2.9345e-04
Loss = 1.0208e-02, PNorm = 178.6971, GNorm = 0.2885, lr_0 = 2.9325e-04
Loss = 1.4247e-02, PNorm = 178.7084, GNorm = 0.2138, lr_0 = 2.9305e-04
Loss = 1.1623e-02, PNorm = 178.7222, GNorm = 0.2091, lr_0 = 2.9284e-04
Loss = 1.0376e-02, PNorm = 178.7344, GNorm = 0.2508, lr_0 = 2.9264e-04
Loss = 1.1101e-02, PNorm = 178.7461, GNorm = 0.1380, lr_0 = 2.9244e-04
Loss = 9.5545e-03, PNorm = 178.7565, GNorm = 0.3075, lr_0 = 2.9224e-04
Loss = 8.1515e-03, PNorm = 178.7673, GNorm = 0.3077, lr_0 = 2.9204e-04
Loss = 9.5520e-03, PNorm = 178.7784, GNorm = 0.3071, lr_0 = 2.9184e-04
Loss = 1.7851e-02, PNorm = 178.7893, GNorm = 0.5249, lr_0 = 2.9164e-04
Loss = 1.0482e-02, PNorm = 178.8009, GNorm = 0.3219, lr_0 = 2.9144e-04
Loss = 1.0506e-02, PNorm = 178.8130, GNorm = 0.6006, lr_0 = 2.9124e-04
Validation mae = 0.121354
Epoch 17
Loss = 7.1689e-03, PNorm = 178.8252, GNorm = 0.2876, lr_0 = 2.9104e-04
Loss = 1.0058e-02, PNorm = 178.8354, GNorm = 0.1894, lr_0 = 2.9084e-04
Loss = 6.4535e-03, PNorm = 178.8458, GNorm = 0.5640, lr_0 = 2.9065e-04
Loss = 7.6445e-03, PNorm = 178.8523, GNorm = 0.2316, lr_0 = 2.9045e-04
Loss = 8.1269e-03, PNorm = 178.8575, GNorm = 0.2858, lr_0 = 2.9025e-04
Loss = 9.1774e-03, PNorm = 178.8636, GNorm = 0.2799, lr_0 = 2.9005e-04
Loss = 1.0993e-02, PNorm = 178.8698, GNorm = 0.1078, lr_0 = 2.8985e-04
Loss = 9.5337e-03, PNorm = 178.8786, GNorm = 0.2131, lr_0 = 2.8965e-04
Loss = 8.2275e-03, PNorm = 178.8856, GNorm = 0.2488, lr_0 = 2.8945e-04
Loss = 7.2858e-03, PNorm = 178.8932, GNorm = 0.1862, lr_0 = 2.8925e-04
Loss = 9.1277e-03, PNorm = 178.9012, GNorm = 0.1649, lr_0 = 2.8906e-04
Loss = 1.0009e-02, PNorm = 178.9111, GNorm = 0.2020, lr_0 = 2.8886e-04
Loss = 9.4209e-03, PNorm = 178.9176, GNorm = 0.3010, lr_0 = 2.8866e-04
Loss = 7.2336e-03, PNorm = 178.9242, GNorm = 0.2791, lr_0 = 2.8846e-04
Loss = 7.9142e-03, PNorm = 178.9326, GNorm = 0.3311, lr_0 = 2.8826e-04
Loss = 1.3608e-02, PNorm = 178.9422, GNorm = 0.3105, lr_0 = 2.8807e-04
Loss = 1.1692e-02, PNorm = 178.9537, GNorm = 0.2341, lr_0 = 2.8787e-04
Loss = 7.3356e-03, PNorm = 178.9624, GNorm = 0.1648, lr_0 = 2.8767e-04
Loss = 1.0165e-02, PNorm = 178.9711, GNorm = 0.1023, lr_0 = 2.8748e-04
Loss = 8.0301e-03, PNorm = 178.9785, GNorm = 0.1508, lr_0 = 2.8728e-04
Loss = 8.1272e-03, PNorm = 178.9846, GNorm = 0.3567, lr_0 = 2.8708e-04
Loss = 1.2942e-02, PNorm = 178.9917, GNorm = 0.2878, lr_0 = 2.8689e-04
Loss = 1.0045e-02, PNorm = 179.0005, GNorm = 0.3534, lr_0 = 2.8669e-04
Loss = 7.9639e-03, PNorm = 179.0076, GNorm = 0.1675, lr_0 = 2.8649e-04
Loss = 1.3594e-02, PNorm = 179.0179, GNorm = 0.1276, lr_0 = 2.8630e-04
Loss = 9.1517e-03, PNorm = 179.0281, GNorm = 0.1986, lr_0 = 2.8610e-04
Loss = 7.1521e-03, PNorm = 179.0403, GNorm = 0.1943, lr_0 = 2.8590e-04
Loss = 1.0213e-02, PNorm = 179.0489, GNorm = 0.1662, lr_0 = 2.8571e-04
Loss = 6.5160e-03, PNorm = 179.0579, GNorm = 0.2384, lr_0 = 2.8551e-04
Loss = 8.0843e-03, PNorm = 179.0647, GNorm = 0.1384, lr_0 = 2.8532e-04
Loss = 7.3933e-03, PNorm = 179.0744, GNorm = 0.3005, lr_0 = 2.8512e-04
Loss = 9.0545e-03, PNorm = 179.0829, GNorm = 0.1697, lr_0 = 2.8493e-04
Loss = 7.4188e-03, PNorm = 179.0929, GNorm = 0.4092, lr_0 = 2.8473e-04
Loss = 5.3514e-03, PNorm = 179.1018, GNorm = 0.1850, lr_0 = 2.8454e-04
Loss = 7.6787e-03, PNorm = 179.1120, GNorm = 0.1810, lr_0 = 2.8434e-04
Loss = 7.9230e-03, PNorm = 179.1200, GNorm = 0.2530, lr_0 = 2.8415e-04
Loss = 9.0373e-03, PNorm = 179.1293, GNorm = 0.2112, lr_0 = 2.8395e-04
Loss = 1.0115e-02, PNorm = 179.1386, GNorm = 0.2400, lr_0 = 2.8376e-04
Loss = 9.5952e-03, PNorm = 179.1462, GNorm = 0.1451, lr_0 = 2.8356e-04
Loss = 7.6599e-03, PNorm = 179.1534, GNorm = 0.1785, lr_0 = 2.8337e-04
Loss = 7.2735e-03, PNorm = 179.1633, GNorm = 0.1378, lr_0 = 2.8317e-04
Loss = 6.1968e-03, PNorm = 179.1722, GNorm = 0.2229, lr_0 = 2.8298e-04
Loss = 1.7368e-02, PNorm = 179.1850, GNorm = 0.3362, lr_0 = 2.8279e-04
Loss = 7.1742e-03, PNorm = 179.1929, GNorm = 0.2369, lr_0 = 2.8259e-04
Loss = 7.2971e-03, PNorm = 179.2031, GNorm = 0.2643, lr_0 = 2.8240e-04
Loss = 8.4771e-03, PNorm = 179.2130, GNorm = 0.3084, lr_0 = 2.8221e-04
Loss = 8.2895e-03, PNorm = 179.2214, GNorm = 0.3145, lr_0 = 2.8201e-04
Loss = 7.8371e-03, PNorm = 179.2296, GNorm = 0.1463, lr_0 = 2.8182e-04
Loss = 1.1502e-02, PNorm = 179.2366, GNorm = 0.1451, lr_0 = 2.8163e-04
Loss = 9.4252e-03, PNorm = 179.2455, GNorm = 0.1559, lr_0 = 2.8143e-04
Loss = 1.2703e-02, PNorm = 179.2536, GNorm = 0.1839, lr_0 = 2.8124e-04
Loss = 1.2449e-02, PNorm = 179.2618, GNorm = 0.3776, lr_0 = 2.8105e-04
Loss = 7.2676e-03, PNorm = 179.2708, GNorm = 0.5630, lr_0 = 2.8085e-04
Loss = 1.4657e-02, PNorm = 179.2807, GNorm = 0.1806, lr_0 = 2.8066e-04
Loss = 7.6335e-03, PNorm = 179.2924, GNorm = 0.1946, lr_0 = 2.8047e-04
Loss = 9.8433e-03, PNorm = 179.3003, GNorm = 0.2863, lr_0 = 2.8028e-04
Loss = 1.1670e-02, PNorm = 179.3073, GNorm = 0.1429, lr_0 = 2.8009e-04
Loss = 1.0006e-02, PNorm = 179.3170, GNorm = 0.3051, lr_0 = 2.7989e-04
Loss = 1.0405e-02, PNorm = 179.3243, GNorm = 0.2717, lr_0 = 2.7970e-04
Loss = 1.1554e-02, PNorm = 179.3347, GNorm = 0.1132, lr_0 = 2.7951e-04
Loss = 6.8089e-03, PNorm = 179.3456, GNorm = 0.1049, lr_0 = 2.7932e-04
Loss = 6.5041e-03, PNorm = 179.3568, GNorm = 0.2041, lr_0 = 2.7913e-04
Loss = 6.7073e-03, PNorm = 179.3649, GNorm = 0.1751, lr_0 = 2.7894e-04
Loss = 7.2821e-03, PNorm = 179.3725, GNorm = 0.1227, lr_0 = 2.7875e-04
Loss = 7.7850e-03, PNorm = 179.3795, GNorm = 0.2514, lr_0 = 2.7855e-04
Loss = 8.7705e-03, PNorm = 179.3860, GNorm = 0.1863, lr_0 = 2.7836e-04
Loss = 1.0429e-02, PNorm = 179.3938, GNorm = 0.2663, lr_0 = 2.7817e-04
Loss = 9.4352e-03, PNorm = 179.4004, GNorm = 0.2981, lr_0 = 2.7798e-04
Loss = 1.0346e-02, PNorm = 179.4069, GNorm = 0.2257, lr_0 = 2.7779e-04
Loss = 1.0745e-02, PNorm = 179.4183, GNorm = 0.4089, lr_0 = 2.7760e-04
Loss = 8.5594e-03, PNorm = 179.4276, GNorm = 0.2641, lr_0 = 2.7741e-04
Loss = 7.2864e-03, PNorm = 179.4369, GNorm = 0.2422, lr_0 = 2.7722e-04
Loss = 1.0978e-02, PNorm = 179.4463, GNorm = 0.4067, lr_0 = 2.7703e-04
Loss = 8.9581e-03, PNorm = 179.4565, GNorm = 0.3240, lr_0 = 2.7684e-04
Loss = 6.2414e-03, PNorm = 179.4687, GNorm = 0.1288, lr_0 = 2.7665e-04
Loss = 1.0073e-02, PNorm = 179.4798, GNorm = 0.1476, lr_0 = 2.7646e-04
Loss = 8.1980e-03, PNorm = 179.4904, GNorm = 0.4373, lr_0 = 2.7627e-04
Loss = 6.6128e-03, PNorm = 179.5004, GNorm = 0.1143, lr_0 = 2.7608e-04
Loss = 7.5720e-03, PNorm = 179.5088, GNorm = 0.3373, lr_0 = 2.7590e-04
Loss = 1.1979e-02, PNorm = 179.5185, GNorm = 0.1832, lr_0 = 2.7571e-04
Loss = 1.2365e-02, PNorm = 179.5276, GNorm = 0.4672, lr_0 = 2.7552e-04
Loss = 9.3745e-03, PNorm = 179.5376, GNorm = 0.2251, lr_0 = 2.7533e-04
Loss = 6.8344e-03, PNorm = 179.5477, GNorm = 0.1694, lr_0 = 2.7514e-04
Loss = 9.7307e-03, PNorm = 179.5572, GNorm = 0.1393, lr_0 = 2.7495e-04
Loss = 7.9341e-03, PNorm = 179.5672, GNorm = 0.4351, lr_0 = 2.7476e-04
Loss = 8.5759e-03, PNorm = 179.5761, GNorm = 0.2979, lr_0 = 2.7457e-04
Loss = 7.3347e-03, PNorm = 179.5853, GNorm = 0.1661, lr_0 = 2.7439e-04
Loss = 7.5179e-03, PNorm = 179.5934, GNorm = 0.1122, lr_0 = 2.7420e-04
Loss = 7.4739e-03, PNorm = 179.6013, GNorm = 0.2563, lr_0 = 2.7401e-04
Loss = 7.1332e-03, PNorm = 179.6111, GNorm = 0.1334, lr_0 = 2.7382e-04
Loss = 1.0496e-02, PNorm = 179.6197, GNorm = 0.2192, lr_0 = 2.7364e-04
Loss = 1.0307e-02, PNorm = 179.6312, GNorm = 0.1941, lr_0 = 2.7345e-04
Loss = 8.6086e-03, PNorm = 179.6425, GNorm = 0.3386, lr_0 = 2.7326e-04
Loss = 1.2789e-02, PNorm = 179.6538, GNorm = 0.2372, lr_0 = 2.7307e-04
Loss = 7.1190e-03, PNorm = 179.6630, GNorm = 0.2309, lr_0 = 2.7289e-04
Loss = 1.7653e-02, PNorm = 179.6755, GNorm = 0.4402, lr_0 = 2.7270e-04
Loss = 9.8674e-03, PNorm = 179.6844, GNorm = 0.1358, lr_0 = 2.7251e-04
Loss = 1.0068e-02, PNorm = 179.6933, GNorm = 0.1455, lr_0 = 2.7233e-04
Loss = 7.8567e-03, PNorm = 179.7034, GNorm = 0.1904, lr_0 = 2.7214e-04
Loss = 1.0016e-02, PNorm = 179.7101, GNorm = 0.2040, lr_0 = 2.7195e-04
Loss = 1.3147e-02, PNorm = 179.7153, GNorm = 0.4025, lr_0 = 2.7177e-04
Loss = 7.9680e-03, PNorm = 179.7226, GNorm = 0.1650, lr_0 = 2.7158e-04
Loss = 7.5988e-03, PNorm = 179.7337, GNorm = 0.1799, lr_0 = 2.7139e-04
Loss = 7.3966e-03, PNorm = 179.7420, GNorm = 0.2155, lr_0 = 2.7121e-04
Loss = 7.3394e-03, PNorm = 179.7499, GNorm = 0.2178, lr_0 = 2.7102e-04
Loss = 6.5246e-03, PNorm = 179.7613, GNorm = 0.1555, lr_0 = 2.7084e-04
Loss = 1.1756e-02, PNorm = 179.7740, GNorm = 0.4055, lr_0 = 2.7065e-04
Loss = 6.7497e-03, PNorm = 179.7825, GNorm = 0.1674, lr_0 = 2.7047e-04
Loss = 8.2214e-03, PNorm = 179.7911, GNorm = 0.1914, lr_0 = 2.7028e-04
Loss = 6.7213e-03, PNorm = 179.8014, GNorm = 0.1657, lr_0 = 2.7010e-04
Loss = 8.6809e-03, PNorm = 179.8101, GNorm = 0.2650, lr_0 = 2.6991e-04
Loss = 1.0793e-02, PNorm = 179.8199, GNorm = 0.2582, lr_0 = 2.6973e-04
Loss = 7.9758e-03, PNorm = 179.8308, GNorm = 0.1285, lr_0 = 2.6954e-04
Loss = 7.2806e-03, PNorm = 179.8399, GNorm = 0.1734, lr_0 = 2.6936e-04
Loss = 1.4629e-02, PNorm = 179.8500, GNorm = 0.1721, lr_0 = 2.6917e-04
Loss = 1.5399e-02, PNorm = 179.8569, GNorm = 0.2706, lr_0 = 2.6899e-04
Loss = 1.3095e-02, PNorm = 179.8655, GNorm = 0.2223, lr_0 = 2.6880e-04
Loss = 8.6372e-03, PNorm = 179.8745, GNorm = 0.2948, lr_0 = 2.6862e-04
Loss = 8.2303e-03, PNorm = 179.8833, GNorm = 0.1290, lr_0 = 2.6844e-04
Loss = 1.2423e-02, PNorm = 179.8945, GNorm = 0.3639, lr_0 = 2.6825e-04
Validation mae = 0.121966
Epoch 18
Loss = 6.4359e-03, PNorm = 179.9048, GNorm = 0.0930, lr_0 = 2.6807e-04
Loss = 5.8525e-03, PNorm = 179.9135, GNorm = 0.1923, lr_0 = 2.6788e-04
Loss = 1.3049e-02, PNorm = 179.9213, GNorm = 0.3303, lr_0 = 2.6770e-04
Loss = 7.3894e-03, PNorm = 179.9266, GNorm = 0.1886, lr_0 = 2.6752e-04
Loss = 6.6384e-03, PNorm = 179.9316, GNorm = 0.3361, lr_0 = 2.6733e-04
Loss = 6.4320e-03, PNorm = 179.9381, GNorm = 0.1558, lr_0 = 2.6715e-04
Loss = 8.4977e-03, PNorm = 179.9472, GNorm = 0.3621, lr_0 = 2.6697e-04
Loss = 7.0075e-03, PNorm = 179.9563, GNorm = 0.0838, lr_0 = 2.6678e-04
Loss = 1.2023e-02, PNorm = 179.9623, GNorm = 0.1407, lr_0 = 2.6660e-04
Loss = 8.1764e-03, PNorm = 179.9703, GNorm = 0.2729, lr_0 = 2.6642e-04
Loss = 6.2924e-03, PNorm = 179.9791, GNorm = 0.2729, lr_0 = 2.6624e-04
Loss = 1.3148e-02, PNorm = 179.9856, GNorm = 0.1384, lr_0 = 2.6605e-04
Loss = 1.9254e-02, PNorm = 179.9941, GNorm = 0.1405, lr_0 = 2.6587e-04
Loss = 9.6814e-03, PNorm = 180.0016, GNorm = 0.2092, lr_0 = 2.6569e-04
Loss = 8.2782e-03, PNorm = 180.0082, GNorm = 0.2229, lr_0 = 2.6551e-04
Loss = 6.7874e-03, PNorm = 180.0157, GNorm = 0.3944, lr_0 = 2.6533e-04
Loss = 7.3327e-03, PNorm = 180.0222, GNorm = 0.1857, lr_0 = 2.6514e-04
Loss = 8.1564e-03, PNorm = 180.0297, GNorm = 0.2374, lr_0 = 2.6496e-04
Loss = 7.3610e-03, PNorm = 180.0368, GNorm = 0.3469, lr_0 = 2.6478e-04
Loss = 6.4340e-03, PNorm = 180.0425, GNorm = 0.1300, lr_0 = 2.6460e-04
Loss = 6.0318e-03, PNorm = 180.0490, GNorm = 0.1408, lr_0 = 2.6442e-04
Loss = 6.4135e-03, PNorm = 180.0574, GNorm = 0.1833, lr_0 = 2.6424e-04
Loss = 8.9675e-03, PNorm = 180.0654, GNorm = 0.1316, lr_0 = 2.6406e-04
Loss = 1.1946e-02, PNorm = 180.0716, GNorm = 0.2653, lr_0 = 2.6388e-04
Loss = 1.0363e-02, PNorm = 180.0780, GNorm = 0.0978, lr_0 = 2.6369e-04
Loss = 5.0015e-03, PNorm = 180.0868, GNorm = 0.0928, lr_0 = 2.6351e-04
Loss = 7.6117e-03, PNorm = 180.0930, GNorm = 0.1538, lr_0 = 2.6333e-04
Loss = 1.2475e-02, PNorm = 180.0991, GNorm = 0.1640, lr_0 = 2.6315e-04
Loss = 9.0236e-03, PNorm = 180.1070, GNorm = 0.2451, lr_0 = 2.6297e-04
Loss = 6.0867e-03, PNorm = 180.1139, GNorm = 0.1944, lr_0 = 2.6279e-04
Loss = 7.4576e-03, PNorm = 180.1210, GNorm = 0.1251, lr_0 = 2.6261e-04
Loss = 5.6889e-03, PNorm = 180.1281, GNorm = 0.1322, lr_0 = 2.6243e-04
Loss = 6.2561e-03, PNorm = 180.1349, GNorm = 0.1089, lr_0 = 2.6225e-04
Loss = 6.8954e-03, PNorm = 180.1428, GNorm = 0.1130, lr_0 = 2.6207e-04
Loss = 6.4320e-03, PNorm = 180.1507, GNorm = 0.0902, lr_0 = 2.6189e-04
Loss = 7.1207e-03, PNorm = 180.1601, GNorm = 0.4909, lr_0 = 2.6171e-04
Loss = 6.3404e-03, PNorm = 180.1674, GNorm = 0.2364, lr_0 = 2.6153e-04
Loss = 6.9395e-03, PNorm = 180.1722, GNorm = 0.1391, lr_0 = 2.6136e-04
Loss = 7.3224e-03, PNorm = 180.1787, GNorm = 0.1430, lr_0 = 2.6118e-04
Loss = 6.5062e-03, PNorm = 180.1841, GNorm = 0.1285, lr_0 = 2.6100e-04
Loss = 1.1239e-02, PNorm = 180.1917, GNorm = 0.2712, lr_0 = 2.6082e-04
Loss = 1.0785e-02, PNorm = 180.2000, GNorm = 0.1375, lr_0 = 2.6064e-04
Loss = 9.5887e-03, PNorm = 180.2055, GNorm = 0.2058, lr_0 = 2.6046e-04
Loss = 6.5326e-03, PNorm = 180.2125, GNorm = 0.1373, lr_0 = 2.6028e-04
Loss = 1.3427e-02, PNorm = 180.2195, GNorm = 0.1587, lr_0 = 2.6011e-04
Loss = 6.6674e-03, PNorm = 180.2271, GNorm = 0.1205, lr_0 = 2.5993e-04
Loss = 5.5466e-03, PNorm = 180.2347, GNorm = 0.1676, lr_0 = 2.5975e-04
Loss = 9.9921e-03, PNorm = 180.2429, GNorm = 0.1807, lr_0 = 2.5957e-04
Loss = 6.7152e-03, PNorm = 180.2495, GNorm = 0.1975, lr_0 = 2.5939e-04
Loss = 8.6325e-03, PNorm = 180.2580, GNorm = 0.3504, lr_0 = 2.5922e-04
Loss = 9.6886e-03, PNorm = 180.2666, GNorm = 0.2030, lr_0 = 2.5904e-04
Loss = 8.4106e-03, PNorm = 180.2747, GNorm = 0.2165, lr_0 = 2.5886e-04
Loss = 8.1634e-03, PNorm = 180.2827, GNorm = 0.2062, lr_0 = 2.5868e-04
Loss = 8.8012e-03, PNorm = 180.2870, GNorm = 0.1171, lr_0 = 2.5851e-04
Loss = 5.0981e-03, PNorm = 180.2940, GNorm = 0.1402, lr_0 = 2.5833e-04
Loss = 5.2599e-03, PNorm = 180.3011, GNorm = 0.1537, lr_0 = 2.5815e-04
Loss = 1.1084e-02, PNorm = 180.3083, GNorm = 0.1369, lr_0 = 2.5797e-04
Loss = 1.0163e-02, PNorm = 180.3175, GNorm = 0.5265, lr_0 = 2.5780e-04
Loss = 5.6480e-03, PNorm = 180.3266, GNorm = 0.2467, lr_0 = 2.5762e-04
Loss = 5.1092e-03, PNorm = 180.3357, GNorm = 0.1792, lr_0 = 2.5745e-04
Loss = 7.6180e-03, PNorm = 180.3443, GNorm = 0.1607, lr_0 = 2.5727e-04
Loss = 5.0784e-03, PNorm = 180.3507, GNorm = 0.1607, lr_0 = 2.5709e-04
Loss = 8.6072e-03, PNorm = 180.3581, GNorm = 0.1896, lr_0 = 2.5692e-04
Loss = 5.2752e-03, PNorm = 180.3653, GNorm = 0.0989, lr_0 = 2.5674e-04
Loss = 9.0217e-03, PNorm = 180.3719, GNorm = 0.2010, lr_0 = 2.5656e-04
Loss = 6.4354e-03, PNorm = 180.3790, GNorm = 0.1777, lr_0 = 2.5639e-04
Loss = 5.5025e-03, PNorm = 180.3874, GNorm = 0.1153, lr_0 = 2.5621e-04
Loss = 1.2539e-02, PNorm = 180.3951, GNorm = 0.2354, lr_0 = 2.5604e-04
Loss = 8.4365e-03, PNorm = 180.4039, GNorm = 0.2952, lr_0 = 2.5586e-04
Loss = 1.2774e-02, PNorm = 180.4053, GNorm = 0.2488, lr_0 = 2.5569e-04
Loss = 6.3459e-03, PNorm = 180.4152, GNorm = 0.3415, lr_0 = 2.5551e-04
Loss = 5.9036e-03, PNorm = 180.4202, GNorm = 0.3326, lr_0 = 2.5534e-04
Loss = 6.5822e-03, PNorm = 180.4269, GNorm = 0.1222, lr_0 = 2.5516e-04
Loss = 6.3563e-03, PNorm = 180.4349, GNorm = 0.1554, lr_0 = 2.5499e-04
Loss = 6.0853e-03, PNorm = 180.4426, GNorm = 0.1726, lr_0 = 2.5481e-04
Loss = 5.3242e-03, PNorm = 180.4507, GNorm = 0.1086, lr_0 = 2.5464e-04
Loss = 7.6486e-03, PNorm = 180.4567, GNorm = 0.1298, lr_0 = 2.5446e-04
Loss = 7.1430e-03, PNorm = 180.4655, GNorm = 0.2827, lr_0 = 2.5429e-04
Loss = 7.3724e-03, PNorm = 180.4741, GNorm = 0.1620, lr_0 = 2.5411e-04
Loss = 7.6252e-03, PNorm = 180.4811, GNorm = 0.1104, lr_0 = 2.5394e-04
Loss = 8.8555e-03, PNorm = 180.4878, GNorm = 0.2044, lr_0 = 2.5377e-04
Loss = 5.3551e-03, PNorm = 180.4949, GNorm = 0.1972, lr_0 = 2.5359e-04
Loss = 6.7928e-03, PNorm = 180.4998, GNorm = 0.1165, lr_0 = 2.5342e-04
Loss = 6.1308e-03, PNorm = 180.5081, GNorm = 0.4855, lr_0 = 2.5325e-04
Loss = 6.1424e-03, PNorm = 180.5166, GNorm = 0.1052, lr_0 = 2.5307e-04
Loss = 8.5285e-03, PNorm = 180.5221, GNorm = 0.1789, lr_0 = 2.5290e-04
Loss = 6.6867e-03, PNorm = 180.5293, GNorm = 0.2689, lr_0 = 2.5273e-04
Loss = 5.4589e-03, PNorm = 180.5377, GNorm = 0.1357, lr_0 = 2.5255e-04
Loss = 7.5113e-03, PNorm = 180.5476, GNorm = 0.1524, lr_0 = 2.5238e-04
Loss = 7.2610e-03, PNorm = 180.5590, GNorm = 0.1803, lr_0 = 2.5221e-04
Loss = 5.7443e-03, PNorm = 180.5673, GNorm = 0.2386, lr_0 = 2.5203e-04
Loss = 6.8862e-03, PNorm = 180.5763, GNorm = 0.2462, lr_0 = 2.5186e-04
Loss = 1.7116e-02, PNorm = 180.5843, GNorm = 0.1782, lr_0 = 2.5169e-04
Loss = 7.8689e-03, PNorm = 180.5912, GNorm = 0.2440, lr_0 = 2.5152e-04
Loss = 8.9542e-03, PNorm = 180.5974, GNorm = 0.3581, lr_0 = 2.5134e-04
Loss = 1.9430e-02, PNorm = 180.6052, GNorm = 0.5543, lr_0 = 2.5117e-04
Loss = 6.1350e-03, PNorm = 180.6141, GNorm = 0.1798, lr_0 = 2.5100e-04
Loss = 5.7988e-03, PNorm = 180.6227, GNorm = 0.3820, lr_0 = 2.5083e-04
Loss = 7.2737e-03, PNorm = 180.6316, GNorm = 0.1770, lr_0 = 2.5066e-04
Loss = 1.1905e-02, PNorm = 180.6403, GNorm = 1.6883, lr_0 = 2.5048e-04
Loss = 7.5377e-03, PNorm = 180.6497, GNorm = 0.1482, lr_0 = 2.5031e-04
Loss = 6.9941e-03, PNorm = 180.6596, GNorm = 0.1673, lr_0 = 2.5014e-04
Loss = 1.0863e-02, PNorm = 180.6670, GNorm = 0.1400, lr_0 = 2.4997e-04
Loss = 1.3850e-02, PNorm = 180.6732, GNorm = 0.1510, lr_0 = 2.4980e-04
Loss = 8.6154e-03, PNorm = 180.6775, GNorm = 0.3294, lr_0 = 2.4963e-04
Loss = 6.7844e-03, PNorm = 180.6869, GNorm = 0.1907, lr_0 = 2.4946e-04
Loss = 9.3128e-03, PNorm = 180.6978, GNorm = 0.1908, lr_0 = 2.4929e-04
Loss = 7.0192e-03, PNorm = 180.7089, GNorm = 0.1502, lr_0 = 2.4911e-04
Loss = 7.7738e-03, PNorm = 180.7185, GNorm = 0.1775, lr_0 = 2.4894e-04
Loss = 5.6302e-03, PNorm = 180.7273, GNorm = 0.3511, lr_0 = 2.4877e-04
Loss = 7.4013e-03, PNorm = 180.7353, GNorm = 0.1872, lr_0 = 2.4860e-04
Loss = 6.6809e-03, PNorm = 180.7415, GNorm = 0.3477, lr_0 = 2.4843e-04
Loss = 8.9682e-03, PNorm = 180.7494, GNorm = 0.1489, lr_0 = 2.4826e-04
Loss = 8.5220e-03, PNorm = 180.7616, GNorm = 0.5344, lr_0 = 2.4809e-04
Loss = 5.2001e-03, PNorm = 180.7709, GNorm = 0.1630, lr_0 = 2.4792e-04
Loss = 7.0687e-03, PNorm = 180.7782, GNorm = 0.1545, lr_0 = 2.4775e-04
Loss = 7.6590e-03, PNorm = 180.7853, GNorm = 0.1003, lr_0 = 2.4758e-04
Loss = 1.8833e-02, PNorm = 180.7923, GNorm = 0.1431, lr_0 = 2.4741e-04
Loss = 9.9881e-03, PNorm = 180.8007, GNorm = 0.9212, lr_0 = 2.4724e-04
Loss = 1.2842e-02, PNorm = 180.8078, GNorm = 0.2245, lr_0 = 2.4707e-04
Validation mae = 0.121276
Epoch 19
Loss = 6.9266e-03, PNorm = 180.8164, GNorm = 0.2332, lr_0 = 2.4690e-04
Loss = 7.6134e-03, PNorm = 180.8237, GNorm = 0.2802, lr_0 = 2.4674e-04
Loss = 7.8283e-03, PNorm = 180.8316, GNorm = 0.2390, lr_0 = 2.4657e-04
Loss = 6.0623e-03, PNorm = 180.8378, GNorm = 0.1805, lr_0 = 2.4640e-04
Loss = 1.0637e-02, PNorm = 180.8420, GNorm = 0.2773, lr_0 = 2.4623e-04
Loss = 5.4831e-03, PNorm = 180.8484, GNorm = 0.2675, lr_0 = 2.4606e-04
Loss = 8.6850e-03, PNorm = 180.8555, GNorm = 0.1589, lr_0 = 2.4589e-04
Loss = 5.3464e-03, PNorm = 180.8626, GNorm = 0.1403, lr_0 = 2.4572e-04
Loss = 5.4014e-03, PNorm = 180.8684, GNorm = 0.1407, lr_0 = 2.4556e-04
Loss = 1.0128e-02, PNorm = 180.8738, GNorm = 0.1472, lr_0 = 2.4539e-04
Loss = 7.3351e-03, PNorm = 180.8791, GNorm = 0.1702, lr_0 = 2.4522e-04
Loss = 6.7005e-03, PNorm = 180.8866, GNorm = 0.2413, lr_0 = 2.4505e-04
Loss = 6.0760e-03, PNorm = 180.8954, GNorm = 0.2187, lr_0 = 2.4488e-04
Loss = 5.7085e-03, PNorm = 180.9039, GNorm = 0.1571, lr_0 = 2.4472e-04
Loss = 5.3017e-03, PNorm = 180.9090, GNorm = 0.1255, lr_0 = 2.4455e-04
Loss = 4.8238e-03, PNorm = 180.9127, GNorm = 0.1492, lr_0 = 2.4438e-04
Loss = 7.6909e-03, PNorm = 180.9177, GNorm = 0.1363, lr_0 = 2.4421e-04
Loss = 6.2548e-03, PNorm = 180.9240, GNorm = 0.6000, lr_0 = 2.4405e-04
Loss = 5.7317e-03, PNorm = 180.9295, GNorm = 0.4500, lr_0 = 2.4388e-04
Loss = 6.2183e-03, PNorm = 180.9374, GNorm = 0.1997, lr_0 = 2.4371e-04
Loss = 4.7656e-03, PNorm = 180.9419, GNorm = 0.1905, lr_0 = 2.4354e-04
Loss = 7.8945e-03, PNorm = 180.9477, GNorm = 0.2219, lr_0 = 2.4338e-04
Loss = 7.1779e-03, PNorm = 180.9535, GNorm = 0.1535, lr_0 = 2.4321e-04
Loss = 5.5977e-03, PNorm = 180.9584, GNorm = 0.1891, lr_0 = 2.4304e-04
Loss = 8.4784e-03, PNorm = 180.9647, GNorm = 0.1579, lr_0 = 2.4288e-04
Loss = 8.0114e-03, PNorm = 180.9705, GNorm = 0.3244, lr_0 = 2.4271e-04
Loss = 7.3059e-03, PNorm = 180.9776, GNorm = 0.7387, lr_0 = 2.4254e-04
Loss = 1.1057e-02, PNorm = 180.9850, GNorm = 0.3098, lr_0 = 2.4238e-04
Loss = 9.1446e-03, PNorm = 180.9924, GNorm = 0.2330, lr_0 = 2.4221e-04
Loss = 6.0169e-03, PNorm = 180.9987, GNorm = 0.1723, lr_0 = 2.4205e-04
Loss = 5.5486e-03, PNorm = 181.0063, GNorm = 0.2107, lr_0 = 2.4188e-04
Loss = 7.7246e-03, PNorm = 181.0133, GNorm = 0.2076, lr_0 = 2.4171e-04
Loss = 6.4352e-03, PNorm = 181.0147, GNorm = 0.2878, lr_0 = 2.4155e-04
Loss = 6.2610e-03, PNorm = 181.0209, GNorm = 0.2280, lr_0 = 2.4138e-04
Loss = 5.3452e-03, PNorm = 181.0280, GNorm = 0.2679, lr_0 = 2.4122e-04
Loss = 5.9072e-03, PNorm = 181.0352, GNorm = 0.2249, lr_0 = 2.4105e-04
Loss = 7.3213e-03, PNorm = 181.0402, GNorm = 0.2476, lr_0 = 2.4089e-04
Loss = 5.5464e-03, PNorm = 181.0465, GNorm = 0.1330, lr_0 = 2.4072e-04
Loss = 5.6870e-03, PNorm = 181.0528, GNorm = 0.1586, lr_0 = 2.4056e-04
Loss = 8.4759e-03, PNorm = 181.0595, GNorm = 0.1124, lr_0 = 2.4039e-04
Loss = 8.8303e-03, PNorm = 181.0623, GNorm = 0.3156, lr_0 = 2.4023e-04
Loss = 8.1274e-03, PNorm = 181.0677, GNorm = 0.1512, lr_0 = 2.4006e-04
Loss = 8.0300e-03, PNorm = 181.0745, GNorm = 0.4321, lr_0 = 2.3990e-04
Loss = 6.4310e-03, PNorm = 181.0815, GNorm = 0.1777, lr_0 = 2.3974e-04
Loss = 7.3636e-03, PNorm = 181.0890, GNorm = 0.3026, lr_0 = 2.3957e-04
Loss = 5.6607e-03, PNorm = 181.0967, GNorm = 0.2980, lr_0 = 2.3941e-04
Loss = 5.4037e-03, PNorm = 181.1038, GNorm = 0.1649, lr_0 = 2.3924e-04
Loss = 9.1643e-03, PNorm = 181.1103, GNorm = 0.0777, lr_0 = 2.3908e-04
Loss = 6.6922e-03, PNorm = 181.1185, GNorm = 0.2233, lr_0 = 2.3892e-04
Loss = 4.0179e-03, PNorm = 181.1243, GNorm = 0.1019, lr_0 = 2.3875e-04
Loss = 4.7567e-03, PNorm = 181.1287, GNorm = 0.1125, lr_0 = 2.3859e-04
Loss = 6.9920e-03, PNorm = 181.1359, GNorm = 0.0797, lr_0 = 2.3842e-04
Loss = 7.6610e-03, PNorm = 181.1419, GNorm = 0.1629, lr_0 = 2.3826e-04
Loss = 5.3017e-03, PNorm = 181.1493, GNorm = 0.2757, lr_0 = 2.3810e-04
Loss = 8.2774e-03, PNorm = 181.1552, GNorm = 0.8033, lr_0 = 2.3794e-04
Loss = 5.3748e-03, PNorm = 181.1621, GNorm = 0.2316, lr_0 = 2.3777e-04
Loss = 4.9283e-03, PNorm = 181.1662, GNorm = 0.1517, lr_0 = 2.3761e-04
Loss = 5.6291e-03, PNorm = 181.1713, GNorm = 0.0676, lr_0 = 2.3745e-04
Loss = 3.9537e-03, PNorm = 181.1761, GNorm = 0.2163, lr_0 = 2.3728e-04
Loss = 5.6198e-03, PNorm = 181.1834, GNorm = 0.1820, lr_0 = 2.3712e-04
Loss = 4.5462e-03, PNorm = 181.1907, GNorm = 0.1596, lr_0 = 2.3696e-04
Loss = 1.0685e-02, PNorm = 181.1969, GNorm = 0.1355, lr_0 = 2.3680e-04
Loss = 8.3143e-03, PNorm = 181.2041, GNorm = 0.2566, lr_0 = 2.3663e-04
Loss = 5.3132e-03, PNorm = 181.2097, GNorm = 0.1257, lr_0 = 2.3647e-04
Loss = 5.5821e-03, PNorm = 181.2153, GNorm = 0.0791, lr_0 = 2.3631e-04
Loss = 6.2468e-03, PNorm = 181.2194, GNorm = 0.8692, lr_0 = 2.3615e-04
Loss = 8.1141e-03, PNorm = 181.2266, GNorm = 0.1324, lr_0 = 2.3599e-04
Loss = 1.9473e-02, PNorm = 181.2370, GNorm = 0.0797, lr_0 = 2.3582e-04
Loss = 1.0543e-02, PNorm = 181.2415, GNorm = 0.2280, lr_0 = 2.3566e-04
Loss = 7.4479e-03, PNorm = 181.2483, GNorm = 0.0925, lr_0 = 2.3550e-04
Loss = 7.7307e-03, PNorm = 181.2547, GNorm = 0.0918, lr_0 = 2.3534e-04
Loss = 8.0497e-03, PNorm = 181.2594, GNorm = 0.2197, lr_0 = 2.3518e-04
Loss = 8.4586e-03, PNorm = 181.2645, GNorm = 0.2915, lr_0 = 2.3502e-04
Loss = 5.2247e-03, PNorm = 181.2732, GNorm = 0.0859, lr_0 = 2.3486e-04
Loss = 5.7839e-03, PNorm = 181.2811, GNorm = 0.2185, lr_0 = 2.3470e-04
Loss = 5.3948e-03, PNorm = 181.2861, GNorm = 0.1099, lr_0 = 2.3454e-04
Loss = 4.4923e-03, PNorm = 181.2934, GNorm = 0.1829, lr_0 = 2.3437e-04
Loss = 9.1387e-03, PNorm = 181.2997, GNorm = 0.2219, lr_0 = 2.3421e-04
Loss = 6.8929e-03, PNorm = 181.3038, GNorm = 0.4009, lr_0 = 2.3405e-04
Loss = 5.2874e-03, PNorm = 181.3101, GNorm = 0.1527, lr_0 = 2.3389e-04
Loss = 1.1152e-02, PNorm = 181.3193, GNorm = 0.1050, lr_0 = 2.3373e-04
Loss = 6.8096e-03, PNorm = 181.3262, GNorm = 0.2283, lr_0 = 2.3357e-04
Loss = 5.4171e-03, PNorm = 181.3340, GNorm = 0.2270, lr_0 = 2.3341e-04
Loss = 4.0815e-03, PNorm = 181.3408, GNorm = 0.1392, lr_0 = 2.3325e-04
Loss = 5.5030e-03, PNorm = 181.3454, GNorm = 0.3319, lr_0 = 2.3309e-04
Loss = 1.0733e-02, PNorm = 181.3508, GNorm = 0.0934, lr_0 = 2.3293e-04
Loss = 7.8883e-03, PNorm = 181.3556, GNorm = 0.1283, lr_0 = 2.3277e-04
Loss = 9.2219e-03, PNorm = 181.3605, GNorm = 0.2055, lr_0 = 2.3261e-04
Loss = 5.7391e-03, PNorm = 181.3663, GNorm = 0.1238, lr_0 = 2.3246e-04
Loss = 5.1182e-03, PNorm = 181.3722, GNorm = 0.1852, lr_0 = 2.3230e-04
Loss = 9.8364e-03, PNorm = 181.3788, GNorm = 0.0966, lr_0 = 2.3214e-04
Loss = 1.1026e-02, PNorm = 181.3874, GNorm = 0.5907, lr_0 = 2.3198e-04
Loss = 1.1961e-02, PNorm = 181.3941, GNorm = 0.1674, lr_0 = 2.3182e-04
Loss = 7.7542e-03, PNorm = 181.4021, GNorm = 0.2003, lr_0 = 2.3166e-04
Loss = 5.1884e-03, PNorm = 181.4094, GNorm = 0.2462, lr_0 = 2.3150e-04
Loss = 5.2255e-03, PNorm = 181.4162, GNorm = 0.1472, lr_0 = 2.3134e-04
Loss = 6.0120e-03, PNorm = 181.4231, GNorm = 0.1403, lr_0 = 2.3118e-04
Loss = 5.8187e-03, PNorm = 181.4272, GNorm = 0.0992, lr_0 = 2.3103e-04
Loss = 7.6475e-03, PNorm = 181.4324, GNorm = 0.1333, lr_0 = 2.3087e-04
Loss = 1.2643e-02, PNorm = 181.4375, GNorm = 0.4014, lr_0 = 2.3071e-04
Loss = 6.6925e-03, PNorm = 181.4424, GNorm = 0.1400, lr_0 = 2.3055e-04
Loss = 1.0187e-02, PNorm = 181.4472, GNorm = 0.1964, lr_0 = 2.3039e-04
Loss = 4.7977e-03, PNorm = 181.4524, GNorm = 0.1314, lr_0 = 2.3024e-04
Loss = 7.2633e-03, PNorm = 181.4556, GNorm = 0.1552, lr_0 = 2.3008e-04
Loss = 7.9951e-03, PNorm = 181.4597, GNorm = 0.2421, lr_0 = 2.2992e-04
Loss = 8.3971e-03, PNorm = 181.4673, GNorm = 0.3612, lr_0 = 2.2976e-04
Loss = 5.5292e-03, PNorm = 181.4721, GNorm = 0.3794, lr_0 = 2.2961e-04
Loss = 7.9988e-03, PNorm = 181.4812, GNorm = 0.1328, lr_0 = 2.2945e-04
Loss = 8.1200e-03, PNorm = 181.4877, GNorm = 0.2768, lr_0 = 2.2929e-04
Loss = 3.9724e-03, PNorm = 181.4933, GNorm = 0.1772, lr_0 = 2.2913e-04
Loss = 4.4341e-03, PNorm = 181.5007, GNorm = 0.3473, lr_0 = 2.2898e-04
Loss = 1.3150e-02, PNorm = 181.5072, GNorm = 0.3081, lr_0 = 2.2882e-04
Loss = 8.3556e-03, PNorm = 181.5152, GNorm = 0.4489, lr_0 = 2.2866e-04
Loss = 6.9046e-03, PNorm = 181.5244, GNorm = 0.3028, lr_0 = 2.2851e-04
Loss = 6.2075e-03, PNorm = 181.5315, GNorm = 0.3275, lr_0 = 2.2835e-04
Loss = 5.9546e-03, PNorm = 181.5398, GNorm = 0.1582, lr_0 = 2.2819e-04
Loss = 1.1482e-02, PNorm = 181.5467, GNorm = 0.1672, lr_0 = 2.2804e-04
Loss = 1.1844e-02, PNorm = 181.5557, GNorm = 0.6941, lr_0 = 2.2788e-04
Loss = 5.4709e-03, PNorm = 181.5642, GNorm = 0.2369, lr_0 = 2.2773e-04
Loss = 1.4408e-02, PNorm = 181.5689, GNorm = 0.2697, lr_0 = 2.2757e-04
Validation mae = 0.121209
Epoch 20
Loss = 7.9978e-03, PNorm = 181.5771, GNorm = 0.1740, lr_0 = 2.2741e-04
Loss = 5.7918e-03, PNorm = 181.5834, GNorm = 0.1942, lr_0 = 2.2726e-04
Loss = 8.8690e-03, PNorm = 181.5870, GNorm = 0.1660, lr_0 = 2.2710e-04
Loss = 6.3492e-03, PNorm = 181.5921, GNorm = 0.2535, lr_0 = 2.2695e-04
Loss = 7.6694e-03, PNorm = 181.5962, GNorm = 0.1032, lr_0 = 2.2679e-04
Loss = 5.5035e-03, PNorm = 181.6031, GNorm = 0.2121, lr_0 = 2.2664e-04
Loss = 5.2668e-03, PNorm = 181.6076, GNorm = 0.3214, lr_0 = 2.2648e-04
Loss = 7.1882e-03, PNorm = 181.6128, GNorm = 0.1051, lr_0 = 2.2632e-04
Loss = 9.3578e-03, PNorm = 181.6161, GNorm = 0.3667, lr_0 = 2.2617e-04
Loss = 7.1772e-03, PNorm = 181.6228, GNorm = 0.1602, lr_0 = 2.2601e-04
Loss = 9.0709e-03, PNorm = 181.6300, GNorm = 0.1961, lr_0 = 2.2586e-04
Loss = 5.3337e-03, PNorm = 181.6349, GNorm = 0.0951, lr_0 = 2.2571e-04
Loss = 5.4506e-03, PNorm = 181.6404, GNorm = 0.1405, lr_0 = 2.2555e-04
Loss = 4.8194e-03, PNorm = 181.6465, GNorm = 0.0719, lr_0 = 2.2540e-04
Loss = 8.0973e-03, PNorm = 181.6539, GNorm = 0.0640, lr_0 = 2.2524e-04
Loss = 4.8636e-03, PNorm = 181.6587, GNorm = 0.1256, lr_0 = 2.2509e-04
Loss = 1.0282e-02, PNorm = 181.6639, GNorm = 0.4682, lr_0 = 2.2493e-04
Loss = 4.3522e-03, PNorm = 181.6687, GNorm = 0.1055, lr_0 = 2.2478e-04
Loss = 4.4121e-03, PNorm = 181.6734, GNorm = 0.1010, lr_0 = 2.2463e-04
Loss = 3.9766e-03, PNorm = 181.6779, GNorm = 0.1217, lr_0 = 2.2447e-04
Loss = 4.7585e-03, PNorm = 181.6824, GNorm = 0.1056, lr_0 = 2.2432e-04
Loss = 8.1409e-03, PNorm = 181.6854, GNorm = 0.1311, lr_0 = 2.2416e-04
Loss = 6.6827e-03, PNorm = 181.6915, GNorm = 0.1350, lr_0 = 2.2401e-04
Loss = 3.6753e-03, PNorm = 181.6956, GNorm = 0.2015, lr_0 = 2.2386e-04
Loss = 5.1248e-03, PNorm = 181.6993, GNorm = 0.3579, lr_0 = 2.2370e-04
Loss = 6.3233e-03, PNorm = 181.7041, GNorm = 0.2764, lr_0 = 2.2355e-04
Loss = 4.5264e-03, PNorm = 181.7090, GNorm = 0.1010, lr_0 = 2.2340e-04
Loss = 4.5925e-03, PNorm = 181.7151, GNorm = 0.1903, lr_0 = 2.2324e-04
Loss = 5.6226e-03, PNorm = 181.7240, GNorm = 0.1159, lr_0 = 2.2309e-04
Loss = 5.2882e-03, PNorm = 181.7304, GNorm = 0.1616, lr_0 = 2.2294e-04
Loss = 4.6662e-03, PNorm = 181.7364, GNorm = 0.0897, lr_0 = 2.2279e-04
Loss = 5.0987e-03, PNorm = 181.7407, GNorm = 0.1494, lr_0 = 2.2263e-04
Loss = 6.1957e-03, PNorm = 181.7467, GNorm = 1.0312, lr_0 = 2.2248e-04
Loss = 3.9130e-03, PNorm = 181.7550, GNorm = 0.1143, lr_0 = 2.2233e-04
Loss = 5.9552e-03, PNorm = 181.7604, GNorm = 0.1850, lr_0 = 2.2218e-04
Loss = 5.7423e-03, PNorm = 181.7636, GNorm = 0.2005, lr_0 = 2.2202e-04
Loss = 5.3485e-03, PNorm = 181.7675, GNorm = 0.0984, lr_0 = 2.2187e-04
Loss = 7.1506e-03, PNorm = 181.7691, GNorm = 0.4425, lr_0 = 2.2172e-04
Loss = 5.3590e-03, PNorm = 181.7732, GNorm = 0.1015, lr_0 = 2.2157e-04
Loss = 6.6239e-03, PNorm = 181.7776, GNorm = 0.2603, lr_0 = 2.2142e-04
Loss = 7.6625e-03, PNorm = 181.7815, GNorm = 0.1288, lr_0 = 2.2126e-04
Loss = 8.9008e-03, PNorm = 181.7884, GNorm = 0.0837, lr_0 = 2.2111e-04
Loss = 4.1580e-03, PNorm = 181.7954, GNorm = 0.1041, lr_0 = 2.2096e-04
Loss = 7.2436e-03, PNorm = 181.8041, GNorm = 0.0776, lr_0 = 2.2081e-04
Loss = 7.0406e-03, PNorm = 181.8102, GNorm = 0.2083, lr_0 = 2.2066e-04
Loss = 6.4133e-03, PNorm = 181.8141, GNorm = 0.4514, lr_0 = 2.2051e-04
Loss = 6.7771e-03, PNorm = 181.8195, GNorm = 0.1312, lr_0 = 2.2036e-04
Loss = 6.7877e-03, PNorm = 181.8227, GNorm = 0.1313, lr_0 = 2.2021e-04
Loss = 4.5194e-03, PNorm = 181.8269, GNorm = 0.1042, lr_0 = 2.2005e-04
Loss = 7.9143e-03, PNorm = 181.8318, GNorm = 0.2231, lr_0 = 2.1990e-04
Loss = 3.4463e-03, PNorm = 181.8370, GNorm = 0.1324, lr_0 = 2.1975e-04
Loss = 6.1896e-03, PNorm = 181.8420, GNorm = 0.1671, lr_0 = 2.1960e-04
Loss = 5.1353e-03, PNorm = 181.8462, GNorm = 0.2199, lr_0 = 2.1945e-04
Loss = 1.2261e-02, PNorm = 181.8510, GNorm = 0.3330, lr_0 = 2.1930e-04
Loss = 5.0724e-03, PNorm = 181.8564, GNorm = 0.1521, lr_0 = 2.1915e-04
Loss = 4.7579e-03, PNorm = 181.8627, GNorm = 0.0743, lr_0 = 2.1900e-04
Loss = 4.7024e-03, PNorm = 181.8681, GNorm = 0.2450, lr_0 = 2.1885e-04
Loss = 6.2290e-03, PNorm = 181.8750, GNorm = 0.1809, lr_0 = 2.1870e-04
Loss = 4.1949e-03, PNorm = 181.8792, GNorm = 0.1010, lr_0 = 2.1855e-04
Loss = 4.3332e-03, PNorm = 181.8857, GNorm = 0.2020, lr_0 = 2.1840e-04
Loss = 6.0013e-03, PNorm = 181.8924, GNorm = 0.1850, lr_0 = 2.1825e-04
Loss = 4.7543e-03, PNorm = 181.8969, GNorm = 0.0837, lr_0 = 2.1810e-04
Loss = 6.2723e-03, PNorm = 181.9052, GNorm = 0.0737, lr_0 = 2.1795e-04
Loss = 7.5974e-03, PNorm = 181.9108, GNorm = 0.2785, lr_0 = 2.1780e-04
Loss = 5.0711e-03, PNorm = 181.9151, GNorm = 0.2254, lr_0 = 2.1765e-04
Loss = 3.6581e-03, PNorm = 181.9194, GNorm = 0.1410, lr_0 = 2.1751e-04
Loss = 4.3285e-03, PNorm = 181.9242, GNorm = 0.2346, lr_0 = 2.1736e-04
Loss = 6.1811e-03, PNorm = 181.9282, GNorm = 0.1125, lr_0 = 2.1721e-04
Loss = 5.5518e-03, PNorm = 181.9326, GNorm = 0.1253, lr_0 = 2.1706e-04
Loss = 4.1307e-03, PNorm = 181.9367, GNorm = 0.1490, lr_0 = 2.1691e-04
Loss = 5.8370e-03, PNorm = 181.9415, GNorm = 0.1601, lr_0 = 2.1676e-04
Loss = 5.0860e-03, PNorm = 181.9469, GNorm = 0.1954, lr_0 = 2.1661e-04
Loss = 4.8812e-03, PNorm = 181.9535, GNorm = 0.1944, lr_0 = 2.1646e-04
Loss = 5.7602e-03, PNorm = 181.9599, GNorm = 0.2036, lr_0 = 2.1632e-04
Loss = 9.2062e-03, PNorm = 181.9661, GNorm = 0.1231, lr_0 = 2.1617e-04
Loss = 5.1694e-03, PNorm = 181.9719, GNorm = 0.1016, lr_0 = 2.1602e-04
Loss = 5.2958e-03, PNorm = 181.9784, GNorm = 0.2060, lr_0 = 2.1587e-04
Loss = 5.5057e-03, PNorm = 181.9837, GNorm = 0.1726, lr_0 = 2.1572e-04
Loss = 6.2342e-03, PNorm = 181.9896, GNorm = 0.1422, lr_0 = 2.1558e-04
Loss = 8.0660e-03, PNorm = 181.9944, GNorm = 0.1628, lr_0 = 2.1543e-04
Loss = 5.1445e-03, PNorm = 181.9982, GNorm = 0.1295, lr_0 = 2.1528e-04
Loss = 5.0906e-03, PNorm = 182.0030, GNorm = 0.1773, lr_0 = 2.1513e-04
Loss = 5.4069e-03, PNorm = 182.0071, GNorm = 0.1402, lr_0 = 2.1499e-04
Loss = 5.5689e-03, PNorm = 182.0113, GNorm = 0.1331, lr_0 = 2.1484e-04
Loss = 1.3026e-02, PNorm = 182.0175, GNorm = 0.1128, lr_0 = 2.1469e-04
Loss = 4.5299e-03, PNorm = 182.0264, GNorm = 0.1423, lr_0 = 2.1454e-04
Loss = 3.7564e-03, PNorm = 182.0346, GNorm = 0.1580, lr_0 = 2.1440e-04
Loss = 4.0871e-03, PNorm = 182.0403, GNorm = 0.1259, lr_0 = 2.1425e-04
Loss = 7.1780e-03, PNorm = 182.0456, GNorm = 0.2209, lr_0 = 2.1410e-04
Loss = 3.5294e-03, PNorm = 182.0507, GNorm = 0.0894, lr_0 = 2.1396e-04
Loss = 5.2301e-03, PNorm = 182.0539, GNorm = 0.3397, lr_0 = 2.1381e-04
Loss = 2.0840e-02, PNorm = 182.0598, GNorm = 2.8161, lr_0 = 2.1366e-04
Loss = 7.5995e-03, PNorm = 182.0682, GNorm = 0.3042, lr_0 = 2.1352e-04
Loss = 1.5887e-02, PNorm = 182.0705, GNorm = 0.7654, lr_0 = 2.1337e-04
Loss = 4.5101e-03, PNorm = 182.0752, GNorm = 0.2420, lr_0 = 2.1323e-04
Loss = 1.0398e-02, PNorm = 182.0785, GNorm = 0.0797, lr_0 = 2.1308e-04
Loss = 7.3444e-03, PNorm = 182.0836, GNorm = 0.1075, lr_0 = 2.1293e-04
Loss = 1.3691e-02, PNorm = 182.0858, GNorm = 0.2109, lr_0 = 2.1279e-04
Loss = 7.8731e-03, PNorm = 182.0925, GNorm = 0.1972, lr_0 = 2.1264e-04
Loss = 5.9797e-03, PNorm = 182.0991, GNorm = 0.1093, lr_0 = 2.1250e-04
Loss = 6.6766e-03, PNorm = 182.1062, GNorm = 0.1962, lr_0 = 2.1235e-04
Loss = 6.2498e-03, PNorm = 182.1136, GNorm = 0.0871, lr_0 = 2.1221e-04
Loss = 5.6897e-03, PNorm = 182.1194, GNorm = 0.1693, lr_0 = 2.1206e-04
Loss = 8.8896e-03, PNorm = 182.1261, GNorm = 0.1309, lr_0 = 2.1191e-04
Loss = 4.7341e-03, PNorm = 182.1332, GNorm = 0.1879, lr_0 = 2.1177e-04
Loss = 1.4105e-02, PNorm = 182.1385, GNorm = 0.1192, lr_0 = 2.1162e-04
Loss = 1.0047e-02, PNorm = 182.1439, GNorm = 0.1651, lr_0 = 2.1148e-04
Loss = 5.4272e-03, PNorm = 182.1509, GNorm = 0.4381, lr_0 = 2.1133e-04
Loss = 7.8093e-03, PNorm = 182.1570, GNorm = 0.1927, lr_0 = 2.1119e-04
Loss = 1.0266e-02, PNorm = 182.1594, GNorm = 0.2282, lr_0 = 2.1104e-04
Loss = 4.9439e-03, PNorm = 182.1632, GNorm = 0.0933, lr_0 = 2.1090e-04
Loss = 4.7683e-03, PNorm = 182.1690, GNorm = 0.2139, lr_0 = 2.1076e-04
Loss = 5.4601e-03, PNorm = 182.1747, GNorm = 0.1154, lr_0 = 2.1061e-04
Loss = 4.1775e-03, PNorm = 182.1829, GNorm = 0.1262, lr_0 = 2.1047e-04
Loss = 4.8497e-03, PNorm = 182.1899, GNorm = 0.1858, lr_0 = 2.1032e-04
Loss = 6.5207e-03, PNorm = 182.1971, GNorm = 0.1293, lr_0 = 2.1018e-04
Loss = 1.0806e-02, PNorm = 182.2047, GNorm = 0.2032, lr_0 = 2.1003e-04
Loss = 4.3662e-03, PNorm = 182.2108, GNorm = 0.1337, lr_0 = 2.0989e-04
Loss = 4.9964e-03, PNorm = 182.2166, GNorm = 0.2225, lr_0 = 2.0975e-04
Loss = 4.0208e-03, PNorm = 182.2209, GNorm = 0.1930, lr_0 = 2.0960e-04
Validation mae = 0.121404
Epoch 21
Loss = 4.2744e-03, PNorm = 182.2258, GNorm = 0.2620, lr_0 = 2.0946e-04
Loss = 4.3900e-03, PNorm = 182.2316, GNorm = 0.3156, lr_0 = 2.0932e-04
Loss = 3.3796e-03, PNorm = 182.2370, GNorm = 0.0587, lr_0 = 2.0917e-04
Loss = 5.2563e-03, PNorm = 182.2424, GNorm = 0.1291, lr_0 = 2.0903e-04
Loss = 4.1993e-03, PNorm = 182.2467, GNorm = 0.2252, lr_0 = 2.0889e-04
Loss = 3.8860e-03, PNorm = 182.2499, GNorm = 0.1687, lr_0 = 2.0874e-04
Loss = 7.2711e-03, PNorm = 182.2537, GNorm = 0.1337, lr_0 = 2.0860e-04
Loss = 7.4734e-03, PNorm = 182.2567, GNorm = 0.1812, lr_0 = 2.0846e-04
Loss = 4.5514e-03, PNorm = 182.2612, GNorm = 0.1821, lr_0 = 2.0831e-04
Loss = 3.9038e-03, PNorm = 182.2658, GNorm = 0.0854, lr_0 = 2.0817e-04
Loss = 7.0478e-03, PNorm = 182.2687, GNorm = 0.1536, lr_0 = 2.0803e-04
Loss = 6.5280e-03, PNorm = 182.2712, GNorm = 0.0935, lr_0 = 2.0789e-04
Loss = 6.7949e-03, PNorm = 182.2760, GNorm = 0.2126, lr_0 = 2.0774e-04
Loss = 5.7561e-03, PNorm = 182.2803, GNorm = 0.0949, lr_0 = 2.0760e-04
Loss = 4.3347e-03, PNorm = 182.2851, GNorm = 0.1590, lr_0 = 2.0746e-04
Loss = 4.1860e-03, PNorm = 182.2891, GNorm = 0.1253, lr_0 = 2.0732e-04
Loss = 5.1020e-03, PNorm = 182.2946, GNorm = 0.0916, lr_0 = 2.0718e-04
Loss = 7.9788e-03, PNorm = 182.3000, GNorm = 0.1483, lr_0 = 2.0703e-04
Loss = 6.0131e-03, PNorm = 182.3051, GNorm = 0.1323, lr_0 = 2.0689e-04
Loss = 5.3458e-03, PNorm = 182.3105, GNorm = 0.1373, lr_0 = 2.0675e-04
Loss = 8.0868e-03, PNorm = 182.3156, GNorm = 0.1123, lr_0 = 2.0661e-04
Loss = 3.5989e-03, PNorm = 182.3203, GNorm = 0.1011, lr_0 = 2.0647e-04
Loss = 6.0926e-03, PNorm = 182.3250, GNorm = 0.2257, lr_0 = 2.0633e-04
Loss = 5.4358e-03, PNorm = 182.3316, GNorm = 0.1793, lr_0 = 2.0618e-04
Loss = 4.3740e-03, PNorm = 182.3377, GNorm = 0.0772, lr_0 = 2.0604e-04
Loss = 4.4571e-03, PNorm = 182.3417, GNorm = 0.1118, lr_0 = 2.0590e-04
Loss = 4.7789e-03, PNorm = 182.3460, GNorm = 0.1396, lr_0 = 2.0576e-04
Loss = 4.7512e-03, PNorm = 182.3508, GNorm = 0.2064, lr_0 = 2.0562e-04
Loss = 3.2734e-03, PNorm = 182.3540, GNorm = 0.0743, lr_0 = 2.0548e-04
Loss = 6.7788e-03, PNorm = 182.3578, GNorm = 0.2650, lr_0 = 2.0534e-04
Loss = 4.9651e-03, PNorm = 182.3623, GNorm = 0.1764, lr_0 = 2.0520e-04
Loss = 6.7705e-03, PNorm = 182.3659, GNorm = 0.1018, lr_0 = 2.0506e-04
Loss = 7.4520e-03, PNorm = 182.3703, GNorm = 0.2041, lr_0 = 2.0492e-04
Loss = 3.1486e-03, PNorm = 182.3754, GNorm = 0.1784, lr_0 = 2.0478e-04
Loss = 4.7243e-03, PNorm = 182.3811, GNorm = 0.1974, lr_0 = 2.0464e-04
Loss = 8.7955e-03, PNorm = 182.3863, GNorm = 0.3094, lr_0 = 2.0450e-04
Loss = 4.3473e-03, PNorm = 182.3917, GNorm = 0.1450, lr_0 = 2.0436e-04
Loss = 4.2703e-03, PNorm = 182.3976, GNorm = 0.1282, lr_0 = 2.0422e-04
Loss = 3.3300e-03, PNorm = 182.4033, GNorm = 0.1226, lr_0 = 2.0408e-04
Loss = 3.4507e-03, PNorm = 182.4087, GNorm = 0.0910, lr_0 = 2.0394e-04
Loss = 4.4874e-03, PNorm = 182.4136, GNorm = 0.1007, lr_0 = 2.0380e-04
Loss = 4.5070e-03, PNorm = 182.4188, GNorm = 0.4252, lr_0 = 2.0366e-04
Loss = 4.0776e-03, PNorm = 182.4233, GNorm = 0.0615, lr_0 = 2.0352e-04
Loss = 4.5308e-03, PNorm = 182.4264, GNorm = 0.1129, lr_0 = 2.0338e-04
Loss = 6.8939e-03, PNorm = 182.4307, GNorm = 0.0771, lr_0 = 2.0324e-04
Loss = 5.2292e-03, PNorm = 182.4346, GNorm = 0.1669, lr_0 = 2.0310e-04
Loss = 5.1621e-03, PNorm = 182.4365, GNorm = 0.1055, lr_0 = 2.0296e-04
Loss = 5.6737e-03, PNorm = 182.4409, GNorm = 0.1072, lr_0 = 2.0282e-04
Loss = 4.6182e-03, PNorm = 182.4445, GNorm = 0.0979, lr_0 = 2.0268e-04
Loss = 5.6903e-03, PNorm = 182.4489, GNorm = 0.1840, lr_0 = 2.0254e-04
Loss = 4.6808e-03, PNorm = 182.4529, GNorm = 0.1638, lr_0 = 2.0240e-04
Loss = 6.4728e-03, PNorm = 182.4562, GNorm = 0.1618, lr_0 = 2.0227e-04
Loss = 4.2550e-03, PNorm = 182.4589, GNorm = 0.1055, lr_0 = 2.0213e-04
Loss = 1.1391e-02, PNorm = 182.4639, GNorm = 0.1139, lr_0 = 2.0199e-04
Loss = 4.5110e-03, PNorm = 182.4696, GNorm = 0.1595, lr_0 = 2.0185e-04
Loss = 8.8530e-03, PNorm = 182.4733, GNorm = 0.3961, lr_0 = 2.0171e-04
Loss = 6.0883e-03, PNorm = 182.4775, GNorm = 0.1536, lr_0 = 2.0157e-04
Loss = 1.0221e-02, PNorm = 182.4821, GNorm = 0.1322, lr_0 = 2.0144e-04
Loss = 5.0401e-03, PNorm = 182.4848, GNorm = 0.2675, lr_0 = 2.0130e-04
Loss = 6.8234e-03, PNorm = 182.4871, GNorm = 0.1280, lr_0 = 2.0116e-04
Loss = 4.4144e-03, PNorm = 182.4900, GNorm = 0.2155, lr_0 = 2.0102e-04
Loss = 4.7365e-03, PNorm = 182.4955, GNorm = 0.1123, lr_0 = 2.0088e-04
Loss = 5.0925e-03, PNorm = 182.5012, GNorm = 0.1446, lr_0 = 2.0075e-04
Loss = 4.2690e-03, PNorm = 182.5064, GNorm = 0.1388, lr_0 = 2.0061e-04
Loss = 7.6940e-03, PNorm = 182.5099, GNorm = 0.2166, lr_0 = 2.0047e-04
Loss = 6.2794e-03, PNorm = 182.5158, GNorm = 0.2795, lr_0 = 2.0033e-04
Loss = 4.0899e-03, PNorm = 182.5199, GNorm = 0.2326, lr_0 = 2.0020e-04
Loss = 5.2712e-03, PNorm = 182.5265, GNorm = 0.0811, lr_0 = 2.0006e-04
Loss = 5.2275e-03, PNorm = 182.5320, GNorm = 0.1598, lr_0 = 1.9992e-04
Loss = 3.7479e-03, PNorm = 182.5378, GNorm = 0.1175, lr_0 = 1.9979e-04
Loss = 9.6083e-03, PNorm = 182.5429, GNorm = 0.1682, lr_0 = 1.9965e-04
Loss = 6.0635e-03, PNorm = 182.5485, GNorm = 0.4886, lr_0 = 1.9951e-04
Loss = 4.9979e-03, PNorm = 182.5534, GNorm = 0.1653, lr_0 = 1.9938e-04
Loss = 6.5048e-03, PNorm = 182.5569, GNorm = 0.1696, lr_0 = 1.9924e-04
Loss = 4.4168e-03, PNorm = 182.5598, GNorm = 0.1338, lr_0 = 1.9910e-04
Loss = 5.7129e-03, PNorm = 182.5614, GNorm = 0.0666, lr_0 = 1.9897e-04
Loss = 5.7325e-03, PNorm = 182.5660, GNorm = 0.1289, lr_0 = 1.9883e-04
Loss = 7.5891e-03, PNorm = 182.5714, GNorm = 0.3846, lr_0 = 1.9869e-04
Loss = 9.7169e-03, PNorm = 182.5758, GNorm = 0.2566, lr_0 = 1.9856e-04
Loss = 4.4465e-03, PNorm = 182.5815, GNorm = 0.1520, lr_0 = 1.9842e-04
Loss = 1.3814e-02, PNorm = 182.5857, GNorm = 0.4906, lr_0 = 1.9829e-04
Loss = 9.9012e-03, PNorm = 182.5934, GNorm = 1.0115, lr_0 = 1.9815e-04
Loss = 3.7650e-03, PNorm = 182.6000, GNorm = 0.1871, lr_0 = 1.9801e-04
Loss = 4.5746e-03, PNorm = 182.6050, GNorm = 0.1619, lr_0 = 1.9788e-04
Loss = 4.3640e-03, PNorm = 182.6072, GNorm = 0.2395, lr_0 = 1.9774e-04
Loss = 4.7021e-03, PNorm = 182.6100, GNorm = 0.2091, lr_0 = 1.9761e-04
Loss = 4.3082e-03, PNorm = 182.6148, GNorm = 0.1234, lr_0 = 1.9747e-04
Loss = 5.4542e-03, PNorm = 182.6201, GNorm = 0.0816, lr_0 = 1.9734e-04
Loss = 4.3676e-03, PNorm = 182.6238, GNorm = 0.1479, lr_0 = 1.9720e-04
Loss = 6.4586e-03, PNorm = 182.6297, GNorm = 0.1022, lr_0 = 1.9707e-04
Loss = 4.2171e-03, PNorm = 182.6346, GNorm = 0.3019, lr_0 = 1.9693e-04
Loss = 3.1580e-03, PNorm = 182.6407, GNorm = 0.0842, lr_0 = 1.9680e-04
Loss = 1.0741e-02, PNorm = 182.6456, GNorm = 0.2560, lr_0 = 1.9666e-04
Loss = 7.2551e-03, PNorm = 182.6482, GNorm = 0.0687, lr_0 = 1.9653e-04
Loss = 4.1348e-03, PNorm = 182.6527, GNorm = 0.2336, lr_0 = 1.9639e-04
Loss = 5.3775e-03, PNorm = 182.6573, GNorm = 0.1575, lr_0 = 1.9626e-04
Loss = 4.6854e-03, PNorm = 182.6622, GNorm = 0.1980, lr_0 = 1.9612e-04
Loss = 8.1025e-03, PNorm = 182.6683, GNorm = 0.3824, lr_0 = 1.9599e-04
Loss = 2.8143e-03, PNorm = 182.6729, GNorm = 0.1284, lr_0 = 1.9585e-04
Loss = 4.5736e-03, PNorm = 182.6806, GNorm = 0.1106, lr_0 = 1.9572e-04
Loss = 5.6310e-03, PNorm = 182.6868, GNorm = 0.2228, lr_0 = 1.9559e-04
Loss = 4.9169e-03, PNorm = 182.6914, GNorm = 0.1383, lr_0 = 1.9545e-04
Loss = 6.7977e-03, PNorm = 182.6951, GNorm = 0.1936, lr_0 = 1.9532e-04
Loss = 6.2950e-03, PNorm = 182.6976, GNorm = 0.1822, lr_0 = 1.9518e-04
Loss = 3.1023e-03, PNorm = 182.7022, GNorm = 0.1108, lr_0 = 1.9505e-04
Loss = 6.4618e-03, PNorm = 182.7058, GNorm = 0.1727, lr_0 = 1.9492e-04
Loss = 5.4203e-03, PNorm = 182.7122, GNorm = 0.1168, lr_0 = 1.9478e-04
Loss = 5.3958e-03, PNorm = 182.7177, GNorm = 0.2248, lr_0 = 1.9465e-04
Loss = 7.9592e-03, PNorm = 182.7227, GNorm = 0.1114, lr_0 = 1.9452e-04
Loss = 6.7347e-03, PNorm = 182.7260, GNorm = 0.1569, lr_0 = 1.9438e-04
Loss = 4.9409e-03, PNorm = 182.7305, GNorm = 0.1780, lr_0 = 1.9425e-04
Loss = 5.8090e-03, PNorm = 182.7334, GNorm = 0.1096, lr_0 = 1.9412e-04
Loss = 5.9430e-03, PNorm = 182.7389, GNorm = 0.0745, lr_0 = 1.9398e-04
Loss = 4.9481e-03, PNorm = 182.7466, GNorm = 0.2681, lr_0 = 1.9385e-04
Loss = 3.1963e-03, PNorm = 182.7529, GNorm = 0.1137, lr_0 = 1.9372e-04
Loss = 3.4641e-03, PNorm = 182.7589, GNorm = 0.1586, lr_0 = 1.9359e-04
Loss = 5.3479e-03, PNorm = 182.7644, GNorm = 0.2540, lr_0 = 1.9345e-04
Loss = 2.2374e-02, PNorm = 182.7706, GNorm = 0.1937, lr_0 = 1.9332e-04
Loss = 7.5384e-03, PNorm = 182.7749, GNorm = 0.1277, lr_0 = 1.9319e-04
Loss = 5.2645e-03, PNorm = 182.7777, GNorm = 0.1645, lr_0 = 1.9306e-04
Validation mae = 0.121231
Epoch 22
Loss = 4.1391e-03, PNorm = 182.7810, GNorm = 0.1174, lr_0 = 1.9292e-04
Loss = 4.5328e-03, PNorm = 182.7828, GNorm = 0.1132, lr_0 = 1.9279e-04
Loss = 5.4834e-03, PNorm = 182.7853, GNorm = 0.1661, lr_0 = 1.9266e-04
Loss = 5.3557e-03, PNorm = 182.7883, GNorm = 0.3387, lr_0 = 1.9253e-04
Loss = 3.7480e-03, PNorm = 182.7927, GNorm = 0.1555, lr_0 = 1.9240e-04
Loss = 4.0739e-03, PNorm = 182.7957, GNorm = 0.0931, lr_0 = 1.9226e-04
Loss = 3.5973e-03, PNorm = 182.7991, GNorm = 0.1713, lr_0 = 1.9213e-04
Loss = 6.0306e-03, PNorm = 182.8027, GNorm = 0.1609, lr_0 = 1.9200e-04
Loss = 6.7665e-03, PNorm = 182.8073, GNorm = 0.1585, lr_0 = 1.9187e-04
Loss = 6.2085e-03, PNorm = 182.8105, GNorm = 0.1533, lr_0 = 1.9174e-04
Loss = 4.0166e-03, PNorm = 182.8143, GNorm = 0.1403, lr_0 = 1.9161e-04
Loss = 4.2547e-03, PNorm = 182.8176, GNorm = 0.2197, lr_0 = 1.9148e-04
Loss = 4.7290e-03, PNorm = 182.8220, GNorm = 0.1159, lr_0 = 1.9134e-04
Loss = 3.4138e-03, PNorm = 182.8241, GNorm = 0.1972, lr_0 = 1.9121e-04
Loss = 6.5101e-03, PNorm = 182.8281, GNorm = 0.0951, lr_0 = 1.9108e-04
Loss = 3.4084e-03, PNorm = 182.8329, GNorm = 0.1342, lr_0 = 1.9095e-04
Loss = 3.9287e-03, PNorm = 182.8382, GNorm = 0.0981, lr_0 = 1.9082e-04
Loss = 5.2881e-03, PNorm = 182.8422, GNorm = 0.0857, lr_0 = 1.9069e-04
Loss = 3.3548e-03, PNorm = 182.8456, GNorm = 0.0829, lr_0 = 1.9056e-04
Loss = 5.8662e-03, PNorm = 182.8499, GNorm = 0.1964, lr_0 = 1.9043e-04
Loss = 3.6045e-03, PNorm = 182.8543, GNorm = 0.0886, lr_0 = 1.9030e-04
Loss = 2.7880e-03, PNorm = 182.8576, GNorm = 0.2216, lr_0 = 1.9017e-04
Loss = 6.4338e-03, PNorm = 182.8604, GNorm = 0.0774, lr_0 = 1.9004e-04
Loss = 8.2780e-03, PNorm = 182.8649, GNorm = 0.1310, lr_0 = 1.8991e-04
Loss = 6.8505e-03, PNorm = 182.8684, GNorm = 0.0780, lr_0 = 1.8978e-04
Loss = 2.9992e-03, PNorm = 182.8717, GNorm = 0.0862, lr_0 = 1.8965e-04
Loss = 3.7678e-03, PNorm = 182.8745, GNorm = 0.1476, lr_0 = 1.8952e-04
Loss = 4.8945e-03, PNorm = 182.8773, GNorm = 0.2032, lr_0 = 1.8939e-04
Loss = 5.5257e-03, PNorm = 182.8799, GNorm = 0.1135, lr_0 = 1.8926e-04
Loss = 6.6480e-03, PNorm = 182.8826, GNorm = 0.1481, lr_0 = 1.8913e-04
Loss = 5.4873e-03, PNorm = 182.8865, GNorm = 0.0810, lr_0 = 1.8900e-04
Loss = 7.2315e-03, PNorm = 182.8899, GNorm = 0.0876, lr_0 = 1.8887e-04
Loss = 3.4710e-03, PNorm = 182.8937, GNorm = 0.1222, lr_0 = 1.8874e-04
Loss = 5.7924e-03, PNorm = 182.8985, GNorm = 0.1127, lr_0 = 1.8861e-04
Loss = 2.9663e-03, PNorm = 182.9024, GNorm = 0.1105, lr_0 = 1.8848e-04
Loss = 3.2084e-03, PNorm = 182.9050, GNorm = 0.1377, lr_0 = 1.8835e-04
Loss = 2.9734e-03, PNorm = 182.9080, GNorm = 0.1846, lr_0 = 1.8822e-04
Loss = 4.1140e-03, PNorm = 182.9117, GNorm = 0.2799, lr_0 = 1.8809e-04
Loss = 6.8778e-03, PNorm = 182.9135, GNorm = 0.2947, lr_0 = 1.8797e-04
Loss = 3.7482e-03, PNorm = 182.9147, GNorm = 0.2371, lr_0 = 1.8784e-04
Loss = 7.6713e-03, PNorm = 182.9201, GNorm = 0.3158, lr_0 = 1.8771e-04
Loss = 7.3123e-03, PNorm = 182.9241, GNorm = 0.2337, lr_0 = 1.8758e-04
Loss = 3.0234e-03, PNorm = 182.9272, GNorm = 0.1591, lr_0 = 1.8745e-04
Loss = 3.3430e-03, PNorm = 182.9293, GNorm = 0.1197, lr_0 = 1.8732e-04
Loss = 5.0137e-03, PNorm = 182.9337, GNorm = 0.0794, lr_0 = 1.8719e-04
Loss = 3.6859e-03, PNorm = 182.9377, GNorm = 0.0838, lr_0 = 1.8707e-04
Loss = 4.2901e-03, PNorm = 182.9391, GNorm = 0.0699, lr_0 = 1.8694e-04
Loss = 4.4858e-03, PNorm = 182.9419, GNorm = 0.1322, lr_0 = 1.8681e-04
Loss = 3.4746e-03, PNorm = 182.9452, GNorm = 0.1440, lr_0 = 1.8668e-04
Loss = 3.1482e-03, PNorm = 182.9480, GNorm = 0.0839, lr_0 = 1.8655e-04
Loss = 3.4044e-03, PNorm = 182.9523, GNorm = 0.1356, lr_0 = 1.8643e-04
Loss = 8.5525e-03, PNorm = 182.9556, GNorm = 0.1106, lr_0 = 1.8630e-04
Loss = 1.0076e-02, PNorm = 182.9610, GNorm = 0.0866, lr_0 = 1.8617e-04
Loss = 2.9423e-03, PNorm = 182.9659, GNorm = 0.0729, lr_0 = 1.8604e-04
Loss = 5.5152e-03, PNorm = 182.9697, GNorm = 0.1765, lr_0 = 1.8592e-04
Loss = 3.7423e-03, PNorm = 182.9727, GNorm = 0.1023, lr_0 = 1.8579e-04
Loss = 4.3413e-03, PNorm = 182.9773, GNorm = 0.2865, lr_0 = 1.8566e-04
Loss = 4.8714e-03, PNorm = 182.9812, GNorm = 0.0903, lr_0 = 1.8553e-04
Loss = 3.3522e-03, PNorm = 182.9866, GNorm = 0.0753, lr_0 = 1.8541e-04
Loss = 4.9614e-03, PNorm = 182.9924, GNorm = 0.0783, lr_0 = 1.8528e-04
Loss = 3.4642e-03, PNorm = 182.9957, GNorm = 0.0626, lr_0 = 1.8515e-04
Loss = 4.4653e-03, PNorm = 182.9987, GNorm = 0.0862, lr_0 = 1.8503e-04
Loss = 5.2365e-03, PNorm = 183.0020, GNorm = 0.0828, lr_0 = 1.8490e-04
Loss = 6.2839e-03, PNorm = 183.0053, GNorm = 0.1189, lr_0 = 1.8477e-04
Loss = 3.3727e-03, PNorm = 183.0099, GNorm = 0.1594, lr_0 = 1.8465e-04
Loss = 5.6317e-03, PNorm = 183.0156, GNorm = 0.1735, lr_0 = 1.8452e-04
Loss = 6.0327e-03, PNorm = 183.0195, GNorm = 0.5542, lr_0 = 1.8439e-04
Loss = 6.1174e-03, PNorm = 183.0233, GNorm = 0.5797, lr_0 = 1.8427e-04
Loss = 2.8707e-03, PNorm = 183.0265, GNorm = 0.1279, lr_0 = 1.8414e-04
Loss = 2.9148e-03, PNorm = 183.0295, GNorm = 0.0807, lr_0 = 1.8401e-04
Loss = 8.6298e-03, PNorm = 183.0357, GNorm = 0.0942, lr_0 = 1.8389e-04
Loss = 5.3812e-03, PNorm = 183.0403, GNorm = 0.1407, lr_0 = 1.8376e-04
Loss = 3.3108e-03, PNorm = 183.0436, GNorm = 0.1059, lr_0 = 1.8364e-04
Loss = 2.6608e-03, PNorm = 183.0463, GNorm = 0.2256, lr_0 = 1.8351e-04
Loss = 4.8381e-03, PNorm = 183.0504, GNorm = 0.1014, lr_0 = 1.8338e-04
Loss = 2.2860e-02, PNorm = 183.0550, GNorm = 0.2048, lr_0 = 1.8326e-04
Loss = 8.3992e-03, PNorm = 183.0565, GNorm = 0.1481, lr_0 = 1.8313e-04
Loss = 4.5650e-03, PNorm = 183.0601, GNorm = 0.1024, lr_0 = 1.8301e-04
Loss = 1.1817e-02, PNorm = 183.0617, GNorm = 0.1462, lr_0 = 1.8288e-04
Loss = 3.4884e-03, PNorm = 183.0646, GNorm = 0.1847, lr_0 = 1.8276e-04
Loss = 7.7717e-03, PNorm = 183.0693, GNorm = 0.3297, lr_0 = 1.8263e-04
Loss = 3.3355e-03, PNorm = 183.0741, GNorm = 0.1531, lr_0 = 1.8251e-04
Loss = 4.4248e-03, PNorm = 183.0791, GNorm = 0.3800, lr_0 = 1.8238e-04
Loss = 6.6513e-03, PNorm = 183.0850, GNorm = 0.0942, lr_0 = 1.8226e-04
Loss = 4.9646e-03, PNorm = 183.0899, GNorm = 0.2036, lr_0 = 1.8213e-04
Loss = 6.9284e-03, PNorm = 183.0930, GNorm = 0.0925, lr_0 = 1.8201e-04
Loss = 3.4983e-03, PNorm = 183.0972, GNorm = 0.0822, lr_0 = 1.8188e-04
Loss = 8.0348e-03, PNorm = 183.1012, GNorm = 0.3105, lr_0 = 1.8176e-04
Loss = 8.7513e-03, PNorm = 183.1058, GNorm = 0.5598, lr_0 = 1.8163e-04
Loss = 2.9817e-03, PNorm = 183.1105, GNorm = 0.0930, lr_0 = 1.8151e-04
Loss = 5.3904e-03, PNorm = 183.1148, GNorm = 0.3022, lr_0 = 1.8138e-04
Loss = 5.7490e-03, PNorm = 183.1192, GNorm = 0.1404, lr_0 = 1.8126e-04
Loss = 5.6322e-03, PNorm = 183.1245, GNorm = 0.0775, lr_0 = 1.8114e-04
Loss = 2.6283e-03, PNorm = 183.1288, GNorm = 0.1370, lr_0 = 1.8101e-04
Loss = 3.4891e-03, PNorm = 183.1305, GNorm = 0.0814, lr_0 = 1.8089e-04
Loss = 6.3605e-03, PNorm = 183.1328, GNorm = 0.1411, lr_0 = 1.8076e-04
Loss = 3.3450e-03, PNorm = 183.1382, GNorm = 0.0946, lr_0 = 1.8064e-04
Loss = 2.7486e-03, PNorm = 183.1422, GNorm = 0.1838, lr_0 = 1.8052e-04
Loss = 6.0260e-03, PNorm = 183.1463, GNorm = 0.1028, lr_0 = 1.8039e-04
Loss = 6.2235e-03, PNorm = 183.1510, GNorm = 0.0810, lr_0 = 1.8027e-04
Loss = 2.9872e-03, PNorm = 183.1547, GNorm = 0.0940, lr_0 = 1.8015e-04
Loss = 6.1593e-03, PNorm = 183.1587, GNorm = 0.2321, lr_0 = 1.8002e-04
Loss = 4.5103e-03, PNorm = 183.1618, GNorm = 0.0832, lr_0 = 1.7990e-04
Loss = 6.4649e-03, PNorm = 183.1663, GNorm = 0.1320, lr_0 = 1.7978e-04
Loss = 7.1361e-03, PNorm = 183.1687, GNorm = 0.1604, lr_0 = 1.7965e-04
Loss = 3.4808e-03, PNorm = 183.1732, GNorm = 0.2590, lr_0 = 1.7953e-04
Loss = 6.2247e-03, PNorm = 183.1793, GNorm = 0.2446, lr_0 = 1.7941e-04
Loss = 5.8365e-03, PNorm = 183.1831, GNorm = 0.1036, lr_0 = 1.7928e-04
Loss = 5.0490e-03, PNorm = 183.1893, GNorm = 0.1889, lr_0 = 1.7916e-04
Loss = 4.8721e-03, PNorm = 183.1935, GNorm = 0.1353, lr_0 = 1.7904e-04
Loss = 4.5797e-03, PNorm = 183.1960, GNorm = 0.1388, lr_0 = 1.7892e-04
Loss = 4.9812e-03, PNorm = 183.2007, GNorm = 0.2876, lr_0 = 1.7879e-04
Loss = 3.8906e-03, PNorm = 183.2048, GNorm = 0.2187, lr_0 = 1.7867e-04
Loss = 3.5377e-03, PNorm = 183.2072, GNorm = 0.1037, lr_0 = 1.7855e-04
Loss = 8.9230e-03, PNorm = 183.2113, GNorm = 0.2489, lr_0 = 1.7843e-04
Loss = 4.0389e-03, PNorm = 183.2163, GNorm = 0.0768, lr_0 = 1.7830e-04
Loss = 1.2977e-02, PNorm = 183.2193, GNorm = 0.2191, lr_0 = 1.7818e-04
Loss = 2.7532e-03, PNorm = 183.2239, GNorm = 0.0745, lr_0 = 1.7806e-04
Loss = 2.8752e-03, PNorm = 183.2267, GNorm = 0.1318, lr_0 = 1.7794e-04
Loss = 4.9840e-03, PNorm = 183.2295, GNorm = 0.1432, lr_0 = 1.7782e-04
Validation mae = 0.121207
Epoch 23
Loss = 6.6387e-03, PNorm = 183.2304, GNorm = 0.2182, lr_0 = 1.7769e-04
Loss = 5.5484e-03, PNorm = 183.2312, GNorm = 0.1027, lr_0 = 1.7757e-04
Loss = 3.2653e-03, PNorm = 183.2329, GNorm = 0.5170, lr_0 = 1.7745e-04
Loss = 3.3854e-03, PNorm = 183.2360, GNorm = 0.1271, lr_0 = 1.7733e-04
Loss = 4.0330e-03, PNorm = 183.2391, GNorm = 0.1819, lr_0 = 1.7721e-04
Loss = 7.9006e-03, PNorm = 183.2432, GNorm = 0.2072, lr_0 = 1.7709e-04
Loss = 4.6511e-03, PNorm = 183.2464, GNorm = 0.0665, lr_0 = 1.7696e-04
Loss = 3.9473e-03, PNorm = 183.2479, GNorm = 0.0974, lr_0 = 1.7684e-04
Loss = 9.6696e-03, PNorm = 183.2515, GNorm = 0.1472, lr_0 = 1.7672e-04
Loss = 2.3168e-03, PNorm = 183.2564, GNorm = 0.1293, lr_0 = 1.7660e-04
Loss = 4.3171e-03, PNorm = 183.2608, GNorm = 0.1180, lr_0 = 1.7648e-04
Loss = 3.4655e-03, PNorm = 183.2640, GNorm = 0.2180, lr_0 = 1.7636e-04
Loss = 4.5503e-03, PNorm = 183.2672, GNorm = 0.0795, lr_0 = 1.7624e-04
Loss = 5.2090e-03, PNorm = 183.2705, GNorm = 0.0869, lr_0 = 1.7612e-04
Loss = 1.1069e-02, PNorm = 183.2731, GNorm = 0.1673, lr_0 = 1.7600e-04
Loss = 5.1776e-03, PNorm = 183.2776, GNorm = 0.1164, lr_0 = 1.7588e-04
Loss = 4.4684e-03, PNorm = 183.2810, GNorm = 0.2235, lr_0 = 1.7576e-04
Loss = 4.7348e-03, PNorm = 183.2843, GNorm = 0.1689, lr_0 = 1.7564e-04
Loss = 3.1789e-03, PNorm = 183.2899, GNorm = 0.1979, lr_0 = 1.7552e-04
Loss = 3.8519e-03, PNorm = 183.2945, GNorm = 0.0664, lr_0 = 1.7540e-04
Loss = 2.7900e-03, PNorm = 183.3000, GNorm = 0.0690, lr_0 = 1.7528e-04
Loss = 2.2698e-03, PNorm = 183.3035, GNorm = 0.1036, lr_0 = 1.7516e-04
Loss = 4.7168e-03, PNorm = 183.3064, GNorm = 0.0849, lr_0 = 1.7504e-04
Loss = 2.0157e-03, PNorm = 183.3084, GNorm = 0.0919, lr_0 = 1.7492e-04
Loss = 9.1834e-03, PNorm = 183.3099, GNorm = 0.6760, lr_0 = 1.7480e-04
Loss = 3.9406e-03, PNorm = 183.3125, GNorm = 0.1389, lr_0 = 1.7468e-04
Loss = 5.8460e-03, PNorm = 183.3157, GNorm = 0.1189, lr_0 = 1.7456e-04
Loss = 2.5626e-03, PNorm = 183.3199, GNorm = 0.1600, lr_0 = 1.7444e-04
Loss = 2.6852e-03, PNorm = 183.3240, GNorm = 0.1569, lr_0 = 1.7432e-04
Loss = 5.0554e-03, PNorm = 183.3257, GNorm = 0.0939, lr_0 = 1.7420e-04
Loss = 4.4825e-03, PNorm = 183.3277, GNorm = 0.2354, lr_0 = 1.7408e-04
Loss = 3.0493e-03, PNorm = 183.3312, GNorm = 0.1418, lr_0 = 1.7396e-04
Loss = 9.7693e-03, PNorm = 183.3334, GNorm = 0.1015, lr_0 = 1.7384e-04
Loss = 3.5739e-03, PNorm = 183.3339, GNorm = 0.1226, lr_0 = 1.7372e-04
Loss = 4.8427e-03, PNorm = 183.3356, GNorm = 0.2543, lr_0 = 1.7360e-04
Loss = 6.5578e-03, PNorm = 183.3398, GNorm = 0.0781, lr_0 = 1.7348e-04
Loss = 5.0738e-03, PNorm = 183.3424, GNorm = 0.1399, lr_0 = 1.7336e-04
Loss = 2.2670e-03, PNorm = 183.3445, GNorm = 0.0859, lr_0 = 1.7325e-04
Loss = 6.0061e-03, PNorm = 183.3478, GNorm = 0.2550, lr_0 = 1.7313e-04
Loss = 3.0243e-03, PNorm = 183.3529, GNorm = 0.2546, lr_0 = 1.7301e-04
Loss = 3.6542e-03, PNorm = 183.3563, GNorm = 0.1445, lr_0 = 1.7289e-04
Loss = 5.3319e-03, PNorm = 183.3581, GNorm = 0.1780, lr_0 = 1.7277e-04
Loss = 3.2347e-03, PNorm = 183.3601, GNorm = 0.1262, lr_0 = 1.7265e-04
Loss = 2.4171e-03, PNorm = 183.3638, GNorm = 0.1111, lr_0 = 1.7253e-04
Loss = 2.7803e-03, PNorm = 183.3682, GNorm = 0.2554, lr_0 = 1.7242e-04
Loss = 4.6015e-03, PNorm = 183.3728, GNorm = 0.2022, lr_0 = 1.7230e-04
Loss = 4.5149e-03, PNorm = 183.3764, GNorm = 0.1216, lr_0 = 1.7218e-04
Loss = 3.3145e-03, PNorm = 183.3803, GNorm = 0.1864, lr_0 = 1.7206e-04
Loss = 3.6366e-03, PNorm = 183.3851, GNorm = 0.0850, lr_0 = 1.7194e-04
Loss = 2.7567e-03, PNorm = 183.3893, GNorm = 0.1007, lr_0 = 1.7183e-04
Loss = 4.8914e-03, PNorm = 183.3924, GNorm = 0.1377, lr_0 = 1.7171e-04
Loss = 3.6643e-03, PNorm = 183.3947, GNorm = 0.1250, lr_0 = 1.7159e-04
Loss = 2.8876e-03, PNorm = 183.3962, GNorm = 0.1597, lr_0 = 1.7147e-04
Loss = 4.7392e-03, PNorm = 183.3985, GNorm = 0.0966, lr_0 = 1.7136e-04
Loss = 4.6636e-03, PNorm = 183.4013, GNorm = 0.3624, lr_0 = 1.7124e-04
Loss = 5.3751e-03, PNorm = 183.4048, GNorm = 0.6478, lr_0 = 1.7112e-04
Loss = 2.6150e-03, PNorm = 183.4073, GNorm = 0.0536, lr_0 = 1.7100e-04
Loss = 3.5853e-03, PNorm = 183.4117, GNorm = 0.0551, lr_0 = 1.7089e-04
Loss = 3.4037e-03, PNorm = 183.4138, GNorm = 0.1661, lr_0 = 1.7077e-04
Loss = 4.3300e-03, PNorm = 183.4172, GNorm = 0.0835, lr_0 = 1.7065e-04
Loss = 5.4808e-03, PNorm = 183.4206, GNorm = 0.0723, lr_0 = 1.7054e-04
Loss = 4.6015e-03, PNorm = 183.4244, GNorm = 0.1493, lr_0 = 1.7042e-04
Loss = 8.1591e-03, PNorm = 183.4288, GNorm = 0.1460, lr_0 = 1.7030e-04
Loss = 3.8824e-03, PNorm = 183.4315, GNorm = 0.0698, lr_0 = 1.7019e-04
Loss = 3.8237e-03, PNorm = 183.4349, GNorm = 0.1117, lr_0 = 1.7007e-04
Loss = 4.6922e-03, PNorm = 183.4382, GNorm = 0.1045, lr_0 = 1.6995e-04
Loss = 4.3028e-03, PNorm = 183.4403, GNorm = 0.1548, lr_0 = 1.6984e-04
Loss = 8.0560e-03, PNorm = 183.4419, GNorm = 0.2479, lr_0 = 1.6972e-04
Loss = 3.6617e-03, PNorm = 183.4444, GNorm = 0.1609, lr_0 = 1.6960e-04
Loss = 7.1845e-03, PNorm = 183.4481, GNorm = 0.1988, lr_0 = 1.6949e-04
Loss = 6.5686e-03, PNorm = 183.4492, GNorm = 0.1518, lr_0 = 1.6937e-04
Loss = 3.3356e-03, PNorm = 183.4513, GNorm = 0.1190, lr_0 = 1.6926e-04
Loss = 8.1544e-03, PNorm = 183.4564, GNorm = 0.1106, lr_0 = 1.6914e-04
Loss = 3.1267e-03, PNorm = 183.4610, GNorm = 0.1784, lr_0 = 1.6902e-04
Loss = 3.7010e-03, PNorm = 183.4654, GNorm = 0.1235, lr_0 = 1.6891e-04
Loss = 7.6794e-03, PNorm = 183.4703, GNorm = 0.3785, lr_0 = 1.6879e-04
Loss = 3.4871e-03, PNorm = 183.4734, GNorm = 0.2040, lr_0 = 1.6868e-04
Loss = 2.7538e-03, PNorm = 183.4765, GNorm = 0.1297, lr_0 = 1.6856e-04
Loss = 3.2899e-03, PNorm = 183.4797, GNorm = 0.1371, lr_0 = 1.6845e-04
Loss = 3.1917e-03, PNorm = 183.4815, GNorm = 0.1296, lr_0 = 1.6833e-04
Loss = 4.3632e-03, PNorm = 183.4825, GNorm = 0.1639, lr_0 = 1.6821e-04
Loss = 3.7508e-03, PNorm = 183.4853, GNorm = 0.1764, lr_0 = 1.6810e-04
Loss = 3.2519e-03, PNorm = 183.4882, GNorm = 0.1214, lr_0 = 1.6798e-04
Loss = 2.5667e-03, PNorm = 183.4912, GNorm = 0.0696, lr_0 = 1.6787e-04
Loss = 9.3438e-03, PNorm = 183.4937, GNorm = 1.1395, lr_0 = 1.6775e-04
Loss = 2.4983e-03, PNorm = 183.4944, GNorm = 0.1470, lr_0 = 1.6764e-04
Loss = 4.0656e-03, PNorm = 183.4972, GNorm = 0.1072, lr_0 = 1.6752e-04
Loss = 7.2200e-03, PNorm = 183.4997, GNorm = 0.0758, lr_0 = 1.6741e-04
Loss = 5.6396e-03, PNorm = 183.5036, GNorm = 0.1193, lr_0 = 1.6729e-04
Loss = 4.4091e-03, PNorm = 183.5072, GNorm = 0.1234, lr_0 = 1.6718e-04
Loss = 5.1858e-03, PNorm = 183.5097, GNorm = 0.0523, lr_0 = 1.6707e-04
Loss = 3.8788e-03, PNorm = 183.5146, GNorm = 0.0970, lr_0 = 1.6695e-04
Loss = 4.4900e-03, PNorm = 183.5179, GNorm = 0.1286, lr_0 = 1.6684e-04
Loss = 5.1764e-03, PNorm = 183.5205, GNorm = 0.0698, lr_0 = 1.6672e-04
Loss = 1.5226e-02, PNorm = 183.5234, GNorm = 3.0334, lr_0 = 1.6661e-04
Loss = 3.7137e-03, PNorm = 183.5287, GNorm = 0.1073, lr_0 = 1.6649e-04
Loss = 6.2187e-03, PNorm = 183.5313, GNorm = 0.2075, lr_0 = 1.6638e-04
Loss = 7.7225e-03, PNorm = 183.5358, GNorm = 0.1308, lr_0 = 1.6627e-04
Loss = 6.9981e-03, PNorm = 183.5373, GNorm = 0.0777, lr_0 = 1.6615e-04
Loss = 2.8946e-03, PNorm = 183.5406, GNorm = 0.1337, lr_0 = 1.6604e-04
Loss = 3.4852e-03, PNorm = 183.5446, GNorm = 0.1167, lr_0 = 1.6592e-04
Loss = 3.9687e-03, PNorm = 183.5457, GNorm = 0.1893, lr_0 = 1.6581e-04
Loss = 5.1582e-03, PNorm = 183.5485, GNorm = 0.2215, lr_0 = 1.6570e-04
Loss = 3.8259e-03, PNorm = 183.5513, GNorm = 0.1611, lr_0 = 1.6558e-04
Loss = 2.2524e-03, PNorm = 183.5544, GNorm = 0.1521, lr_0 = 1.6547e-04
Loss = 2.1149e-03, PNorm = 183.5579, GNorm = 0.0627, lr_0 = 1.6536e-04
Loss = 3.9345e-03, PNorm = 183.5604, GNorm = 0.0991, lr_0 = 1.6524e-04
Loss = 2.8848e-03, PNorm = 183.5637, GNorm = 0.1432, lr_0 = 1.6513e-04
Loss = 5.0151e-03, PNorm = 183.5656, GNorm = 0.1307, lr_0 = 1.6502e-04
Loss = 2.8142e-03, PNorm = 183.5674, GNorm = 0.1843, lr_0 = 1.6490e-04
Loss = 3.9505e-03, PNorm = 183.5703, GNorm = 0.1751, lr_0 = 1.6479e-04
Loss = 3.1188e-03, PNorm = 183.5728, GNorm = 0.1876, lr_0 = 1.6468e-04
Loss = 4.8266e-03, PNorm = 183.5758, GNorm = 0.1519, lr_0 = 1.6457e-04
Loss = 4.3297e-03, PNorm = 183.5787, GNorm = 0.1343, lr_0 = 1.6445e-04
Loss = 1.0149e-02, PNorm = 183.5815, GNorm = 0.2484, lr_0 = 1.6434e-04
Loss = 3.7311e-03, PNorm = 183.5828, GNorm = 0.0823, lr_0 = 1.6423e-04
Loss = 2.8910e-03, PNorm = 183.5855, GNorm = 0.1828, lr_0 = 1.6412e-04
Loss = 3.0643e-03, PNorm = 183.5879, GNorm = 0.1534, lr_0 = 1.6400e-04
Loss = 2.5090e-03, PNorm = 183.5920, GNorm = 0.1173, lr_0 = 1.6389e-04
Loss = 3.6191e-03, PNorm = 183.5971, GNorm = 0.1319, lr_0 = 1.6378e-04
Validation mae = 0.121351
Epoch 24
Loss = 2.3843e-03, PNorm = 183.6009, GNorm = 0.0735, lr_0 = 1.6367e-04
Loss = 3.8775e-03, PNorm = 183.6037, GNorm = 0.0899, lr_0 = 1.6355e-04
Loss = 3.5721e-03, PNorm = 183.6051, GNorm = 0.0526, lr_0 = 1.6344e-04
Loss = 2.1569e-03, PNorm = 183.6062, GNorm = 0.1273, lr_0 = 1.6333e-04
Loss = 3.1516e-03, PNorm = 183.6098, GNorm = 0.1777, lr_0 = 1.6322e-04
Loss = 2.4676e-03, PNorm = 183.6116, GNorm = 0.2211, lr_0 = 1.6311e-04
Loss = 3.9125e-03, PNorm = 183.6140, GNorm = 0.1796, lr_0 = 1.6299e-04
Loss = 5.0531e-03, PNorm = 183.6160, GNorm = 0.1939, lr_0 = 1.6288e-04
Loss = 3.9573e-03, PNorm = 183.6196, GNorm = 0.1665, lr_0 = 1.6277e-04
Loss = 3.0522e-03, PNorm = 183.6223, GNorm = 0.0826, lr_0 = 1.6266e-04
Loss = 2.6947e-03, PNorm = 183.6246, GNorm = 0.1157, lr_0 = 1.6255e-04
Loss = 3.9580e-03, PNorm = 183.6286, GNorm = 0.0652, lr_0 = 1.6244e-04
Loss = 3.2711e-03, PNorm = 183.6317, GNorm = 0.0639, lr_0 = 1.6233e-04
Loss = 7.4313e-03, PNorm = 183.6352, GNorm = 0.2017, lr_0 = 1.6221e-04
Loss = 3.3123e-03, PNorm = 183.6373, GNorm = 0.1305, lr_0 = 1.6210e-04
Loss = 2.8346e-03, PNorm = 183.6398, GNorm = 0.1033, lr_0 = 1.6199e-04
Loss = 4.4743e-03, PNorm = 183.6440, GNorm = 0.0897, lr_0 = 1.6188e-04
Loss = 4.6909e-03, PNorm = 183.6461, GNorm = 0.0804, lr_0 = 1.6177e-04
Loss = 2.1420e-03, PNorm = 183.6491, GNorm = 0.2142, lr_0 = 1.6166e-04
Loss = 6.3408e-03, PNorm = 183.6533, GNorm = 0.1595, lr_0 = 1.6155e-04
Loss = 3.4444e-03, PNorm = 183.6563, GNorm = 0.2661, lr_0 = 1.6144e-04
Loss = 4.7738e-03, PNorm = 183.6610, GNorm = 0.2380, lr_0 = 1.6133e-04
Loss = 4.6418e-03, PNorm = 183.6638, GNorm = 0.2192, lr_0 = 1.6122e-04
Loss = 3.6725e-03, PNorm = 183.6662, GNorm = 0.0881, lr_0 = 1.6111e-04
Loss = 3.4432e-03, PNorm = 183.6699, GNorm = 0.1495, lr_0 = 1.6100e-04
Loss = 5.6447e-03, PNorm = 183.6722, GNorm = 0.2194, lr_0 = 1.6089e-04
Loss = 3.6662e-03, PNorm = 183.6730, GNorm = 0.0862, lr_0 = 1.6078e-04
Loss = 2.3692e-03, PNorm = 183.6774, GNorm = 0.0904, lr_0 = 1.6067e-04
Loss = 2.6652e-03, PNorm = 183.6818, GNorm = 0.4342, lr_0 = 1.6056e-04
Loss = 2.8695e-03, PNorm = 183.6868, GNorm = 0.2356, lr_0 = 1.6045e-04
Loss = 4.2024e-03, PNorm = 183.6890, GNorm = 0.0883, lr_0 = 1.6034e-04
Loss = 4.4353e-03, PNorm = 183.6918, GNorm = 0.0913, lr_0 = 1.6023e-04
Loss = 5.5977e-03, PNorm = 183.6944, GNorm = 0.1098, lr_0 = 1.6012e-04
Loss = 2.8520e-03, PNorm = 183.6961, GNorm = 0.1403, lr_0 = 1.6001e-04
Loss = 2.6654e-03, PNorm = 183.6992, GNorm = 0.0708, lr_0 = 1.5990e-04
Loss = 5.1119e-03, PNorm = 183.7025, GNorm = 0.1205, lr_0 = 1.5979e-04
Loss = 2.1985e-03, PNorm = 183.7059, GNorm = 0.0675, lr_0 = 1.5968e-04
Loss = 1.9678e-03, PNorm = 183.7082, GNorm = 0.0881, lr_0 = 1.5957e-04
Loss = 3.4207e-03, PNorm = 183.7101, GNorm = 0.1945, lr_0 = 1.5946e-04
Loss = 2.3506e-03, PNorm = 183.7125, GNorm = 0.0632, lr_0 = 1.5935e-04
Loss = 4.0852e-03, PNorm = 183.7138, GNorm = 0.1811, lr_0 = 1.5924e-04
Loss = 3.1880e-03, PNorm = 183.7158, GNorm = 0.1463, lr_0 = 1.5913e-04
Loss = 3.3418e-03, PNorm = 183.7193, GNorm = 0.1409, lr_0 = 1.5902e-04
Loss = 2.6506e-03, PNorm = 183.7216, GNorm = 0.1344, lr_0 = 1.5891e-04
Loss = 4.4943e-03, PNorm = 183.7230, GNorm = 0.2737, lr_0 = 1.5880e-04
Loss = 1.8603e-03, PNorm = 183.7263, GNorm = 0.0743, lr_0 = 1.5870e-04
Loss = 2.5668e-03, PNorm = 183.7295, GNorm = 0.1557, lr_0 = 1.5859e-04
Loss = 3.6242e-03, PNorm = 183.7335, GNorm = 0.1340, lr_0 = 1.5848e-04
Loss = 3.0546e-03, PNorm = 183.7357, GNorm = 0.2104, lr_0 = 1.5837e-04
Loss = 5.6991e-03, PNorm = 183.7375, GNorm = 0.1146, lr_0 = 1.5826e-04
Loss = 2.4804e-03, PNorm = 183.7396, GNorm = 0.1817, lr_0 = 1.5815e-04
Loss = 7.8502e-03, PNorm = 183.7431, GNorm = 0.1803, lr_0 = 1.5804e-04
Loss = 3.0461e-03, PNorm = 183.7461, GNorm = 0.4131, lr_0 = 1.5794e-04
Loss = 6.8114e-03, PNorm = 183.7488, GNorm = 0.0683, lr_0 = 1.5783e-04
Loss = 2.4549e-03, PNorm = 183.7519, GNorm = 0.1478, lr_0 = 1.5772e-04
Loss = 7.7296e-03, PNorm = 183.7539, GNorm = 0.1243, lr_0 = 1.5761e-04
Loss = 6.8119e-03, PNorm = 183.7557, GNorm = 0.7716, lr_0 = 1.5750e-04
Loss = 4.6937e-03, PNorm = 183.7563, GNorm = 0.1418, lr_0 = 1.5740e-04
Loss = 9.1579e-03, PNorm = 183.7591, GNorm = 0.1034, lr_0 = 1.5729e-04
Loss = 2.1572e-03, PNorm = 183.7618, GNorm = 0.1085, lr_0 = 1.5718e-04
Loss = 4.7286e-03, PNorm = 183.7637, GNorm = 0.0834, lr_0 = 1.5707e-04
Loss = 4.7591e-03, PNorm = 183.7669, GNorm = 0.1123, lr_0 = 1.5697e-04
Loss = 3.7690e-03, PNorm = 183.7691, GNorm = 0.0548, lr_0 = 1.5686e-04
Loss = 4.8558e-03, PNorm = 183.7706, GNorm = 0.2185, lr_0 = 1.5675e-04
Loss = 5.6327e-03, PNorm = 183.7733, GNorm = 0.1112, lr_0 = 1.5664e-04
Loss = 5.6210e-03, PNorm = 183.7761, GNorm = 0.0680, lr_0 = 1.5654e-04
Loss = 3.0101e-03, PNorm = 183.7788, GNorm = 0.1826, lr_0 = 1.5643e-04
Loss = 2.1577e-03, PNorm = 183.7812, GNorm = 0.0670, lr_0 = 1.5632e-04
Loss = 3.6964e-03, PNorm = 183.7831, GNorm = 0.0695, lr_0 = 1.5621e-04
Loss = 3.5479e-03, PNorm = 183.7854, GNorm = 0.2754, lr_0 = 1.5611e-04
Loss = 4.2386e-03, PNorm = 183.7889, GNorm = 0.1889, lr_0 = 1.5600e-04
Loss = 4.1064e-03, PNorm = 183.7915, GNorm = 0.0912, lr_0 = 1.5589e-04
Loss = 4.6266e-03, PNorm = 183.7938, GNorm = 0.0995, lr_0 = 1.5579e-04
Loss = 3.7291e-03, PNorm = 183.7951, GNorm = 0.0509, lr_0 = 1.5568e-04
Loss = 2.0360e-03, PNorm = 183.7973, GNorm = 0.0569, lr_0 = 1.5557e-04
Loss = 5.1041e-03, PNorm = 183.8000, GNorm = 0.1075, lr_0 = 1.5547e-04
Loss = 3.6808e-03, PNorm = 183.8023, GNorm = 0.1332, lr_0 = 1.5536e-04
Loss = 3.0444e-03, PNorm = 183.8049, GNorm = 0.0699, lr_0 = 1.5525e-04
Loss = 3.8998e-03, PNorm = 183.8060, GNorm = 0.2926, lr_0 = 1.5515e-04
Loss = 4.4169e-03, PNorm = 183.8092, GNorm = 0.1562, lr_0 = 1.5504e-04
Loss = 4.4395e-03, PNorm = 183.8119, GNorm = 0.1214, lr_0 = 1.5493e-04
Loss = 9.7470e-03, PNorm = 183.8140, GNorm = 0.1452, lr_0 = 1.5483e-04
Loss = 4.4189e-03, PNorm = 183.8165, GNorm = 0.1066, lr_0 = 1.5472e-04
Loss = 4.2061e-03, PNorm = 183.8210, GNorm = 0.2660, lr_0 = 1.5462e-04
Loss = 2.6600e-03, PNorm = 183.8256, GNorm = 0.0762, lr_0 = 1.5451e-04
Loss = 2.4619e-03, PNorm = 183.8287, GNorm = 0.0819, lr_0 = 1.5440e-04
Loss = 2.4230e-03, PNorm = 183.8317, GNorm = 0.1060, lr_0 = 1.5430e-04
Loss = 5.8955e-03, PNorm = 183.8358, GNorm = 0.1763, lr_0 = 1.5419e-04
Loss = 2.5173e-03, PNorm = 183.8391, GNorm = 0.1115, lr_0 = 1.5409e-04
Loss = 6.4734e-03, PNorm = 183.8428, GNorm = 0.0886, lr_0 = 1.5398e-04
Loss = 3.3511e-03, PNorm = 183.8474, GNorm = 0.1713, lr_0 = 1.5388e-04
Loss = 2.8225e-03, PNorm = 183.8500, GNorm = 0.1222, lr_0 = 1.5377e-04
Loss = 4.0241e-03, PNorm = 183.8523, GNorm = 0.1612, lr_0 = 1.5367e-04
Loss = 3.2969e-03, PNorm = 183.8552, GNorm = 0.1954, lr_0 = 1.5356e-04
Loss = 4.9774e-03, PNorm = 183.8568, GNorm = 0.3369, lr_0 = 1.5346e-04
Loss = 4.4199e-03, PNorm = 183.8593, GNorm = 0.1311, lr_0 = 1.5335e-04
Loss = 8.2457e-03, PNorm = 183.8643, GNorm = 0.2198, lr_0 = 1.5325e-04
Loss = 1.9156e-02, PNorm = 183.8696, GNorm = 0.4397, lr_0 = 1.5314e-04
Loss = 3.8652e-03, PNorm = 183.8715, GNorm = 0.1680, lr_0 = 1.5304e-04
Loss = 3.1711e-03, PNorm = 183.8745, GNorm = 0.1509, lr_0 = 1.5293e-04
Loss = 7.5259e-03, PNorm = 183.8780, GNorm = 0.1562, lr_0 = 1.5283e-04
Loss = 3.0543e-03, PNorm = 183.8789, GNorm = 0.1310, lr_0 = 1.5272e-04
Loss = 7.1425e-03, PNorm = 183.8816, GNorm = 0.0669, lr_0 = 1.5262e-04
Loss = 7.7012e-03, PNorm = 183.8838, GNorm = 0.1366, lr_0 = 1.5251e-04
Loss = 2.0507e-03, PNorm = 183.8865, GNorm = 0.1003, lr_0 = 1.5241e-04
Loss = 2.6291e-03, PNorm = 183.8879, GNorm = 0.2089, lr_0 = 1.5230e-04
Loss = 9.2326e-03, PNorm = 183.8896, GNorm = 0.5238, lr_0 = 1.5220e-04
Loss = 7.3914e-03, PNorm = 183.8927, GNorm = 0.0734, lr_0 = 1.5209e-04
Loss = 4.5981e-03, PNorm = 183.8958, GNorm = 0.0797, lr_0 = 1.5199e-04
Loss = 3.7332e-03, PNorm = 183.8982, GNorm = 0.1265, lr_0 = 1.5189e-04
Loss = 2.3699e-03, PNorm = 183.8990, GNorm = 0.0836, lr_0 = 1.5178e-04
Loss = 2.1346e-03, PNorm = 183.9017, GNorm = 0.1799, lr_0 = 1.5168e-04
Loss = 2.0929e-03, PNorm = 183.9042, GNorm = 0.0526, lr_0 = 1.5157e-04
Loss = 3.4718e-03, PNorm = 183.9053, GNorm = 0.1960, lr_0 = 1.5147e-04
Loss = 3.6534e-03, PNorm = 183.9070, GNorm = 0.0918, lr_0 = 1.5137e-04
Loss = 2.0234e-03, PNorm = 183.9087, GNorm = 0.0790, lr_0 = 1.5126e-04
Loss = 3.5507e-03, PNorm = 183.9111, GNorm = 0.0995, lr_0 = 1.5116e-04
Loss = 6.8987e-03, PNorm = 183.9148, GNorm = 0.3602, lr_0 = 1.5106e-04
Loss = 7.8835e-03, PNorm = 183.9188, GNorm = 0.3062, lr_0 = 1.5095e-04
Loss = 2.7311e-03, PNorm = 183.9223, GNorm = 0.3154, lr_0 = 1.5085e-04
Validation mae = 0.121248
Epoch 25
Loss = 3.7188e-03, PNorm = 183.9253, GNorm = 0.0869, lr_0 = 1.5075e-04
Loss = 2.2518e-03, PNorm = 183.9283, GNorm = 0.1939, lr_0 = 1.5064e-04
Loss = 2.8330e-03, PNorm = 183.9312, GNorm = 0.2605, lr_0 = 1.5054e-04
Loss = 4.3086e-03, PNorm = 183.9335, GNorm = 0.1918, lr_0 = 1.5044e-04
Loss = 4.9774e-03, PNorm = 183.9356, GNorm = 0.3207, lr_0 = 1.5033e-04
Loss = 2.1598e-03, PNorm = 183.9371, GNorm = 0.1413, lr_0 = 1.5023e-04
Loss = 9.9639e-03, PNorm = 183.9398, GNorm = 2.5248, lr_0 = 1.5013e-04
Loss = 4.6415e-03, PNorm = 183.9425, GNorm = 0.1135, lr_0 = 1.5002e-04
Loss = 4.1276e-03, PNorm = 183.9430, GNorm = 0.1199, lr_0 = 1.4992e-04
Loss = 2.2290e-03, PNorm = 183.9448, GNorm = 0.1043, lr_0 = 1.4982e-04
Loss = 3.1891e-03, PNorm = 183.9468, GNorm = 0.1183, lr_0 = 1.4972e-04
Loss = 2.7204e-03, PNorm = 183.9483, GNorm = 0.0958, lr_0 = 1.4961e-04
Loss = 6.6781e-03, PNorm = 183.9492, GNorm = 0.1521, lr_0 = 1.4951e-04
Loss = 1.9160e-03, PNorm = 183.9516, GNorm = 0.2025, lr_0 = 1.4941e-04
Loss = 2.6743e-03, PNorm = 183.9525, GNorm = 0.0821, lr_0 = 1.4931e-04
Loss = 7.0238e-03, PNorm = 183.9544, GNorm = 0.1082, lr_0 = 1.4920e-04
Loss = 3.1635e-03, PNorm = 183.9562, GNorm = 0.0877, lr_0 = 1.4910e-04
Loss = 2.2501e-03, PNorm = 183.9571, GNorm = 0.1770, lr_0 = 1.4900e-04
Loss = 3.8116e-03, PNorm = 183.9592, GNorm = 0.0628, lr_0 = 1.4890e-04
Loss = 3.4520e-03, PNorm = 183.9611, GNorm = 0.1764, lr_0 = 1.4880e-04
Loss = 2.8272e-03, PNorm = 183.9625, GNorm = 0.1020, lr_0 = 1.4869e-04
Loss = 2.3856e-03, PNorm = 183.9646, GNorm = 0.1257, lr_0 = 1.4859e-04
Loss = 7.6768e-03, PNorm = 183.9670, GNorm = 0.0930, lr_0 = 1.4849e-04
Loss = 3.5002e-03, PNorm = 183.9687, GNorm = 0.2355, lr_0 = 1.4839e-04
Loss = 1.9633e-03, PNorm = 183.9711, GNorm = 0.0923, lr_0 = 1.4829e-04
Loss = 4.8019e-03, PNorm = 183.9717, GNorm = 0.1882, lr_0 = 1.4818e-04
Loss = 2.6791e-03, PNorm = 183.9737, GNorm = 0.0574, lr_0 = 1.4808e-04
Loss = 5.6682e-03, PNorm = 183.9768, GNorm = 0.0585, lr_0 = 1.4798e-04
Loss = 2.6977e-03, PNorm = 183.9792, GNorm = 0.0603, lr_0 = 1.4788e-04
Loss = 5.1349e-03, PNorm = 183.9829, GNorm = 1.2314, lr_0 = 1.4778e-04
Loss = 5.4797e-03, PNorm = 183.9868, GNorm = 0.1837, lr_0 = 1.4768e-04
Loss = 2.2345e-03, PNorm = 183.9919, GNorm = 0.1672, lr_0 = 1.4758e-04
Loss = 4.4798e-03, PNorm = 183.9928, GNorm = 0.0751, lr_0 = 1.4748e-04
Loss = 2.3321e-03, PNorm = 183.9947, GNorm = 0.0510, lr_0 = 1.4737e-04
Loss = 1.7137e-03, PNorm = 183.9980, GNorm = 0.0492, lr_0 = 1.4727e-04
Loss = 3.9342e-03, PNorm = 184.0010, GNorm = 0.0819, lr_0 = 1.4717e-04
Loss = 3.6000e-03, PNorm = 184.0021, GNorm = 0.0849, lr_0 = 1.4707e-04
Loss = 4.0252e-03, PNorm = 184.0027, GNorm = 0.0652, lr_0 = 1.4697e-04
Loss = 4.7660e-03, PNorm = 184.0045, GNorm = 0.0739, lr_0 = 1.4687e-04
Loss = 3.3392e-03, PNorm = 184.0071, GNorm = 0.1026, lr_0 = 1.4677e-04
Loss = 5.0154e-03, PNorm = 184.0095, GNorm = 0.0688, lr_0 = 1.4667e-04
Loss = 2.1195e-03, PNorm = 184.0108, GNorm = 0.1629, lr_0 = 1.4657e-04
Loss = 4.9494e-03, PNorm = 184.0118, GNorm = 0.7575, lr_0 = 1.4647e-04
Loss = 1.7938e-03, PNorm = 184.0130, GNorm = 0.1391, lr_0 = 1.4637e-04
Loss = 4.0114e-03, PNorm = 184.0155, GNorm = 0.1355, lr_0 = 1.4627e-04
Loss = 4.1021e-03, PNorm = 184.0176, GNorm = 0.0569, lr_0 = 1.4617e-04
Loss = 5.8772e-03, PNorm = 184.0210, GNorm = 0.1238, lr_0 = 1.4607e-04
Loss = 1.9752e-03, PNorm = 184.0231, GNorm = 0.0667, lr_0 = 1.4597e-04
Loss = 1.9077e-03, PNorm = 184.0252, GNorm = 0.1125, lr_0 = 1.4587e-04
Loss = 7.8648e-03, PNorm = 184.0276, GNorm = 0.0937, lr_0 = 1.4577e-04
Loss = 4.8839e-03, PNorm = 184.0302, GNorm = 0.6506, lr_0 = 1.4567e-04
Loss = 4.7693e-03, PNorm = 184.0338, GNorm = 0.3496, lr_0 = 1.4557e-04
Loss = 3.7825e-03, PNorm = 184.0357, GNorm = 0.1112, lr_0 = 1.4547e-04
Loss = 3.0843e-03, PNorm = 184.0378, GNorm = 0.1228, lr_0 = 1.4537e-04
Loss = 5.0007e-03, PNorm = 184.0404, GNorm = 0.1138, lr_0 = 1.4527e-04
Loss = 2.4756e-03, PNorm = 184.0425, GNorm = 0.0635, lr_0 = 1.4517e-04
Loss = 6.5000e-03, PNorm = 184.0442, GNorm = 0.3090, lr_0 = 1.4507e-04
Loss = 1.8344e-03, PNorm = 184.0469, GNorm = 0.1109, lr_0 = 1.4497e-04
Loss = 3.7975e-03, PNorm = 184.0479, GNorm = 0.1805, lr_0 = 1.4487e-04
Loss = 2.4453e-03, PNorm = 184.0490, GNorm = 0.2523, lr_0 = 1.4477e-04
Loss = 3.9465e-03, PNorm = 184.0504, GNorm = 0.0614, lr_0 = 1.4467e-04
Loss = 1.9482e-03, PNorm = 184.0525, GNorm = 0.0943, lr_0 = 1.4457e-04
Loss = 2.6228e-03, PNorm = 184.0551, GNorm = 0.3393, lr_0 = 1.4447e-04
Loss = 2.6550e-03, PNorm = 184.0577, GNorm = 0.1810, lr_0 = 1.4438e-04
Loss = 3.6620e-03, PNorm = 184.0606, GNorm = 0.1311, lr_0 = 1.4428e-04
Loss = 2.5745e-03, PNorm = 184.0643, GNorm = 0.1446, lr_0 = 1.4418e-04
Loss = 9.6898e-03, PNorm = 184.0664, GNorm = 0.2957, lr_0 = 1.4408e-04
Loss = 2.0792e-03, PNorm = 184.0693, GNorm = 0.1931, lr_0 = 1.4398e-04
Loss = 3.2909e-03, PNorm = 184.0715, GNorm = 0.2462, lr_0 = 1.4388e-04
Loss = 2.2390e-03, PNorm = 184.0740, GNorm = 0.1312, lr_0 = 1.4378e-04
Loss = 5.2817e-03, PNorm = 184.0766, GNorm = 0.0636, lr_0 = 1.4368e-04
Loss = 1.9109e-03, PNorm = 184.0788, GNorm = 0.1175, lr_0 = 1.4359e-04
Loss = 2.4517e-03, PNorm = 184.0805, GNorm = 0.1118, lr_0 = 1.4349e-04
Loss = 3.6325e-03, PNorm = 184.0834, GNorm = 0.0944, lr_0 = 1.4339e-04
Loss = 4.7906e-03, PNorm = 184.0867, GNorm = 0.1309, lr_0 = 1.4329e-04
Loss = 6.5126e-03, PNorm = 184.0899, GNorm = 0.1219, lr_0 = 1.4319e-04
Loss = 3.2383e-03, PNorm = 184.0927, GNorm = 0.3091, lr_0 = 1.4310e-04
Loss = 2.4916e-03, PNorm = 184.0956, GNorm = 0.1233, lr_0 = 1.4300e-04
Loss = 6.4949e-03, PNorm = 184.0977, GNorm = 0.1035, lr_0 = 1.4290e-04
Loss = 2.3020e-03, PNorm = 184.0991, GNorm = 0.0786, lr_0 = 1.4280e-04
Loss = 2.0910e-03, PNorm = 184.1008, GNorm = 0.0654, lr_0 = 1.4270e-04
Loss = 2.0440e-03, PNorm = 184.1029, GNorm = 0.1084, lr_0 = 1.4261e-04
Loss = 5.9538e-03, PNorm = 184.1046, GNorm = 0.3684, lr_0 = 1.4251e-04
Loss = 4.2338e-03, PNorm = 184.1079, GNorm = 0.1122, lr_0 = 1.4241e-04
Loss = 2.6002e-03, PNorm = 184.1111, GNorm = 0.0858, lr_0 = 1.4231e-04
Loss = 6.8251e-03, PNorm = 184.1149, GNorm = 0.2437, lr_0 = 1.4222e-04
Loss = 1.6545e-03, PNorm = 184.1177, GNorm = 0.0776, lr_0 = 1.4212e-04
Loss = 1.9728e-03, PNorm = 184.1209, GNorm = 0.0456, lr_0 = 1.4202e-04
Loss = 2.5645e-03, PNorm = 184.1238, GNorm = 0.1790, lr_0 = 1.4192e-04
Loss = 5.8826e-03, PNorm = 184.1268, GNorm = 0.0997, lr_0 = 1.4183e-04
Loss = 7.0760e-03, PNorm = 184.1284, GNorm = 0.1010, lr_0 = 1.4173e-04
Loss = 3.1460e-03, PNorm = 184.1300, GNorm = 0.0935, lr_0 = 1.4163e-04
Loss = 2.4204e-03, PNorm = 184.1327, GNorm = 0.1208, lr_0 = 1.4153e-04
Loss = 6.1285e-03, PNorm = 184.1352, GNorm = 0.1630, lr_0 = 1.4144e-04
Loss = 2.9460e-03, PNorm = 184.1363, GNorm = 0.1428, lr_0 = 1.4134e-04
Loss = 1.9263e-03, PNorm = 184.1383, GNorm = 0.0443, lr_0 = 1.4124e-04
Loss = 2.9359e-03, PNorm = 184.1412, GNorm = 0.1628, lr_0 = 1.4115e-04
Loss = 3.3656e-03, PNorm = 184.1438, GNorm = 0.1116, lr_0 = 1.4105e-04
Loss = 4.1735e-03, PNorm = 184.1478, GNorm = 0.2716, lr_0 = 1.4095e-04
Loss = 4.4960e-03, PNorm = 184.1506, GNorm = 0.0970, lr_0 = 1.4086e-04
Loss = 4.9978e-03, PNorm = 184.1534, GNorm = 0.1552, lr_0 = 1.4076e-04
Loss = 3.0438e-03, PNorm = 184.1558, GNorm = 0.1123, lr_0 = 1.4066e-04
Loss = 1.7837e-03, PNorm = 184.1585, GNorm = 0.1263, lr_0 = 1.4057e-04
Loss = 2.2899e-03, PNorm = 184.1608, GNorm = 0.1590, lr_0 = 1.4047e-04
Loss = 6.1785e-03, PNorm = 184.1633, GNorm = 0.1600, lr_0 = 1.4038e-04
Loss = 2.7720e-03, PNorm = 184.1667, GNorm = 0.1101, lr_0 = 1.4028e-04
Loss = 6.9730e-03, PNorm = 184.1691, GNorm = 0.2566, lr_0 = 1.4018e-04
Loss = 2.5857e-03, PNorm = 184.1709, GNorm = 0.1261, lr_0 = 1.4009e-04
Loss = 2.2357e-03, PNorm = 184.1755, GNorm = 0.1714, lr_0 = 1.3999e-04
Loss = 2.9411e-03, PNorm = 184.1784, GNorm = 0.5472, lr_0 = 1.3990e-04
Loss = 1.7081e-03, PNorm = 184.1814, GNorm = 0.0700, lr_0 = 1.3980e-04
Loss = 4.8746e-03, PNorm = 184.1842, GNorm = 0.1995, lr_0 = 1.3970e-04
Loss = 4.3386e-03, PNorm = 184.1853, GNorm = 0.0676, lr_0 = 1.3961e-04
Loss = 5.6080e-03, PNorm = 184.1862, GNorm = 0.1607, lr_0 = 1.3951e-04
Loss = 3.7449e-03, PNorm = 184.1880, GNorm = 0.0761, lr_0 = 1.3942e-04
Loss = 7.9891e-03, PNorm = 184.1890, GNorm = 0.2173, lr_0 = 1.3932e-04
Loss = 2.9754e-03, PNorm = 184.1902, GNorm = 0.0757, lr_0 = 1.3923e-04
Loss = 5.5299e-03, PNorm = 184.1915, GNorm = 0.0744, lr_0 = 1.3913e-04
Loss = 4.5293e-03, PNorm = 184.1932, GNorm = 0.1172, lr_0 = 1.3904e-04
Loss = 6.4486e-03, PNorm = 184.1948, GNorm = 0.1093, lr_0 = 1.3894e-04
Validation mae = 0.121248
Epoch 26
Loss = 6.5220e-03, PNorm = 184.1967, GNorm = 0.1143, lr_0 = 1.3884e-04
Loss = 2.8851e-03, PNorm = 184.1993, GNorm = 0.1706, lr_0 = 1.3875e-04
Loss = 3.4282e-03, PNorm = 184.2007, GNorm = 0.3575, lr_0 = 1.3865e-04
Loss = 3.8480e-03, PNorm = 184.2020, GNorm = 0.2015, lr_0 = 1.3856e-04
Loss = 4.4215e-03, PNorm = 184.2027, GNorm = 0.0825, lr_0 = 1.3846e-04
Loss = 1.9461e-03, PNorm = 184.2046, GNorm = 0.0855, lr_0 = 1.3837e-04
Loss = 3.0797e-03, PNorm = 184.2061, GNorm = 0.1007, lr_0 = 1.3828e-04
Loss = 6.2825e-03, PNorm = 184.2065, GNorm = 0.1760, lr_0 = 1.3818e-04
Loss = 7.7466e-03, PNorm = 184.2087, GNorm = 0.1374, lr_0 = 1.3809e-04
Loss = 3.0382e-03, PNorm = 184.2119, GNorm = 0.1157, lr_0 = 1.3799e-04
Loss = 2.1404e-03, PNorm = 184.2159, GNorm = 0.1028, lr_0 = 1.3790e-04
Loss = 1.6393e-03, PNorm = 184.2182, GNorm = 0.1199, lr_0 = 1.3780e-04
Loss = 3.4622e-03, PNorm = 184.2199, GNorm = 0.0508, lr_0 = 1.3771e-04
Loss = 2.0800e-03, PNorm = 184.2237, GNorm = 0.1475, lr_0 = 1.3761e-04
Loss = 3.1393e-03, PNorm = 184.2259, GNorm = 0.1553, lr_0 = 1.3752e-04
Loss = 1.4897e-03, PNorm = 184.2259, GNorm = 0.1301, lr_0 = 1.3742e-04
Loss = 1.5423e-03, PNorm = 184.2260, GNorm = 0.2287, lr_0 = 1.3733e-04
Loss = 1.9804e-03, PNorm = 184.2272, GNorm = 0.0706, lr_0 = 1.3724e-04
Loss = 1.9121e-03, PNorm = 184.2295, GNorm = 0.0428, lr_0 = 1.3714e-04
Loss = 6.9612e-03, PNorm = 184.2305, GNorm = 0.0917, lr_0 = 1.3705e-04
Loss = 4.5927e-03, PNorm = 184.2325, GNorm = 0.0550, lr_0 = 1.3695e-04
Loss = 2.5741e-03, PNorm = 184.2351, GNorm = 0.1067, lr_0 = 1.3686e-04
Loss = 1.8522e-03, PNorm = 184.2361, GNorm = 0.1638, lr_0 = 1.3677e-04
Loss = 2.5493e-03, PNorm = 184.2394, GNorm = 0.0520, lr_0 = 1.3667e-04
Loss = 2.2157e-03, PNorm = 184.2424, GNorm = 0.6091, lr_0 = 1.3658e-04
Loss = 4.2742e-03, PNorm = 184.2447, GNorm = 0.3073, lr_0 = 1.3649e-04
Loss = 2.9947e-03, PNorm = 184.2481, GNorm = 0.0603, lr_0 = 1.3639e-04
Loss = 6.1452e-03, PNorm = 184.2497, GNorm = 0.2122, lr_0 = 1.3630e-04
Loss = 1.5479e-03, PNorm = 184.2514, GNorm = 0.0410, lr_0 = 1.3621e-04
Loss = 5.8774e-03, PNorm = 184.2535, GNorm = 0.1523, lr_0 = 1.3611e-04
Loss = 2.1303e-03, PNorm = 184.2550, GNorm = 0.0824, lr_0 = 1.3602e-04
Loss = 2.9045e-03, PNorm = 184.2581, GNorm = 0.0681, lr_0 = 1.3593e-04
Loss = 8.7889e-03, PNorm = 184.2612, GNorm = 0.1540, lr_0 = 1.3583e-04
Loss = 1.7296e-03, PNorm = 184.2621, GNorm = 0.1442, lr_0 = 1.3574e-04
Loss = 1.7370e-03, PNorm = 184.2646, GNorm = 0.0980, lr_0 = 1.3565e-04
Loss = 1.3803e-03, PNorm = 184.2669, GNorm = 0.0729, lr_0 = 1.3555e-04
Loss = 3.5211e-03, PNorm = 184.2697, GNorm = 0.1555, lr_0 = 1.3546e-04
Loss = 6.1158e-03, PNorm = 184.2730, GNorm = 0.9375, lr_0 = 1.3537e-04
Loss = 1.5566e-03, PNorm = 184.2746, GNorm = 0.1030, lr_0 = 1.3528e-04
Loss = 1.5121e-02, PNorm = 184.2774, GNorm = 0.3385, lr_0 = 1.3518e-04
Loss = 3.8930e-03, PNorm = 184.2777, GNorm = 0.1055, lr_0 = 1.3509e-04
Loss = 4.3544e-03, PNorm = 184.2790, GNorm = 0.0543, lr_0 = 1.3500e-04
Loss = 1.5320e-03, PNorm = 184.2824, GNorm = 0.1354, lr_0 = 1.3491e-04
Loss = 2.2401e-03, PNorm = 184.2844, GNorm = 0.1141, lr_0 = 1.3481e-04
Loss = 5.3600e-03, PNorm = 184.2854, GNorm = 0.1044, lr_0 = 1.3472e-04
Loss = 1.8218e-03, PNorm = 184.2874, GNorm = 0.1078, lr_0 = 1.3463e-04
Loss = 5.1095e-03, PNorm = 184.2896, GNorm = 0.0461, lr_0 = 1.3454e-04
Loss = 5.3469e-03, PNorm = 184.2916, GNorm = 0.1949, lr_0 = 1.3444e-04
Loss = 1.8331e-03, PNorm = 184.2945, GNorm = 0.1496, lr_0 = 1.3435e-04
Loss = 4.3257e-03, PNorm = 184.2976, GNorm = 0.0842, lr_0 = 1.3426e-04
Loss = 1.1706e-02, PNorm = 184.2990, GNorm = 0.2772, lr_0 = 1.3417e-04
Loss = 2.6067e-03, PNorm = 184.3010, GNorm = 0.3263, lr_0 = 1.3408e-04
Loss = 3.3606e-03, PNorm = 184.3039, GNorm = 0.0811, lr_0 = 1.3398e-04
Loss = 3.3470e-03, PNorm = 184.3051, GNorm = 0.0518, lr_0 = 1.3389e-04
Loss = 2.9064e-03, PNorm = 184.3073, GNorm = 0.0876, lr_0 = 1.3380e-04
Loss = 1.6937e-03, PNorm = 184.3094, GNorm = 0.0780, lr_0 = 1.3371e-04
Loss = 2.0784e-03, PNorm = 184.3127, GNorm = 0.0510, lr_0 = 1.3362e-04
Loss = 1.4086e-03, PNorm = 184.3146, GNorm = 0.0639, lr_0 = 1.3353e-04
Loss = 2.5388e-03, PNorm = 184.3163, GNorm = 0.0544, lr_0 = 1.3343e-04
Loss = 1.7989e-03, PNorm = 184.3177, GNorm = 0.0710, lr_0 = 1.3334e-04
Loss = 3.7069e-03, PNorm = 184.3192, GNorm = 0.1253, lr_0 = 1.3325e-04
Loss = 3.0255e-03, PNorm = 184.3217, GNorm = 0.0685, lr_0 = 1.3316e-04
Loss = 4.0952e-03, PNorm = 184.3244, GNorm = 0.2304, lr_0 = 1.3307e-04
Loss = 1.7861e-03, PNorm = 184.3271, GNorm = 0.0662, lr_0 = 1.3298e-04
Loss = 4.1171e-03, PNorm = 184.3285, GNorm = 0.0905, lr_0 = 1.3289e-04
Loss = 5.6605e-03, PNorm = 184.3295, GNorm = 0.1516, lr_0 = 1.3280e-04
Loss = 2.5503e-03, PNorm = 184.3305, GNorm = 0.1271, lr_0 = 1.3270e-04
Loss = 2.0080e-03, PNorm = 184.3311, GNorm = 0.0544, lr_0 = 1.3261e-04
Loss = 7.7985e-03, PNorm = 184.3321, GNorm = 0.1450, lr_0 = 1.3252e-04
Loss = 5.3556e-03, PNorm = 184.3343, GNorm = 0.1178, lr_0 = 1.3243e-04
Loss = 1.5853e-03, PNorm = 184.3357, GNorm = 0.0753, lr_0 = 1.3234e-04
Loss = 1.9727e-03, PNorm = 184.3367, GNorm = 0.0749, lr_0 = 1.3225e-04
Loss = 3.3277e-03, PNorm = 184.3389, GNorm = 0.0926, lr_0 = 1.3216e-04
Loss = 4.4277e-03, PNorm = 184.3402, GNorm = 0.1046, lr_0 = 1.3207e-04
Loss = 1.7262e-03, PNorm = 184.3423, GNorm = 0.0537, lr_0 = 1.3198e-04
Loss = 2.3801e-03, PNorm = 184.3444, GNorm = 0.0870, lr_0 = 1.3189e-04
Loss = 4.5994e-03, PNorm = 184.3467, GNorm = 0.2552, lr_0 = 1.3180e-04
Loss = 3.6035e-03, PNorm = 184.3493, GNorm = 0.0797, lr_0 = 1.3171e-04
Loss = 4.3910e-03, PNorm = 184.3516, GNorm = 0.0624, lr_0 = 1.3162e-04
Loss = 9.2342e-03, PNorm = 184.3523, GNorm = 0.9003, lr_0 = 1.3153e-04
Loss = 5.4983e-03, PNorm = 184.3528, GNorm = 0.0920, lr_0 = 1.3144e-04
Loss = 2.9334e-03, PNorm = 184.3560, GNorm = 0.1754, lr_0 = 1.3135e-04
Loss = 2.3140e-03, PNorm = 184.3579, GNorm = 0.0559, lr_0 = 1.3126e-04
Loss = 1.7369e-03, PNorm = 184.3609, GNorm = 0.1267, lr_0 = 1.3117e-04
Loss = 2.1258e-03, PNorm = 184.3623, GNorm = 0.3767, lr_0 = 1.3108e-04
Loss = 5.4620e-03, PNorm = 184.3630, GNorm = 0.4781, lr_0 = 1.3099e-04
Loss = 6.4564e-03, PNorm = 184.3645, GNorm = 0.1054, lr_0 = 1.3090e-04
Loss = 3.5327e-03, PNorm = 184.3670, GNorm = 0.0939, lr_0 = 1.3081e-04
Loss = 4.0651e-03, PNorm = 184.3687, GNorm = 0.3272, lr_0 = 1.3072e-04
Loss = 3.0979e-03, PNorm = 184.3702, GNorm = 0.1483, lr_0 = 1.3063e-04
Loss = 1.5153e-03, PNorm = 184.3719, GNorm = 0.0850, lr_0 = 1.3054e-04
Loss = 4.6324e-03, PNorm = 184.3738, GNorm = 0.0906, lr_0 = 1.3045e-04
Loss = 2.6818e-03, PNorm = 184.3768, GNorm = 0.1524, lr_0 = 1.3036e-04
Loss = 1.5708e-03, PNorm = 184.3791, GNorm = 0.0550, lr_0 = 1.3027e-04
Loss = 2.3667e-03, PNorm = 184.3810, GNorm = 0.1143, lr_0 = 1.3018e-04
Loss = 1.5531e-03, PNorm = 184.3821, GNorm = 0.0823, lr_0 = 1.3009e-04
Loss = 1.8895e-03, PNorm = 184.3829, GNorm = 0.0790, lr_0 = 1.3000e-04
Loss = 1.8765e-03, PNorm = 184.3840, GNorm = 0.2140, lr_0 = 1.2992e-04
Loss = 5.4435e-03, PNorm = 184.3859, GNorm = 0.0808, lr_0 = 1.2983e-04
Loss = 2.9925e-03, PNorm = 184.3875, GNorm = 0.2259, lr_0 = 1.2974e-04
Loss = 7.5168e-03, PNorm = 184.3895, GNorm = 0.1633, lr_0 = 1.2965e-04
Loss = 2.6934e-03, PNorm = 184.3924, GNorm = 0.1165, lr_0 = 1.2956e-04
Loss = 1.6731e-03, PNorm = 184.3949, GNorm = 0.0487, lr_0 = 1.2947e-04
Loss = 4.2610e-03, PNorm = 184.3964, GNorm = 0.1329, lr_0 = 1.2938e-04
Loss = 2.8271e-03, PNorm = 184.3975, GNorm = 0.1662, lr_0 = 1.2929e-04
Loss = 2.0349e-03, PNorm = 184.3987, GNorm = 0.2247, lr_0 = 1.2921e-04
Loss = 1.6138e-03, PNorm = 184.4002, GNorm = 0.0464, lr_0 = 1.2912e-04
Loss = 4.6463e-03, PNorm = 184.4013, GNorm = 0.3109, lr_0 = 1.2903e-04
Loss = 3.5823e-03, PNorm = 184.4029, GNorm = 0.2641, lr_0 = 1.2894e-04
Loss = 1.6493e-03, PNorm = 184.4043, GNorm = 0.0418, lr_0 = 1.2885e-04
Loss = 3.8093e-03, PNorm = 184.4058, GNorm = 0.2531, lr_0 = 1.2876e-04
Loss = 2.4652e-03, PNorm = 184.4079, GNorm = 0.0837, lr_0 = 1.2867e-04
Loss = 5.6676e-03, PNorm = 184.4093, GNorm = 0.0781, lr_0 = 1.2859e-04
Loss = 1.9980e-03, PNorm = 184.4112, GNorm = 0.0800, lr_0 = 1.2850e-04
Loss = 4.6041e-03, PNorm = 184.4133, GNorm = 0.1567, lr_0 = 1.2841e-04
Loss = 3.0369e-03, PNorm = 184.4176, GNorm = 0.2089, lr_0 = 1.2832e-04
Loss = 3.6406e-03, PNorm = 184.4212, GNorm = 0.5383, lr_0 = 1.2823e-04
Loss = 3.0166e-03, PNorm = 184.4247, GNorm = 0.1985, lr_0 = 1.2815e-04
Loss = 3.8759e-03, PNorm = 184.4264, GNorm = 0.0521, lr_0 = 1.2806e-04
Loss = 2.8233e-03, PNorm = 184.4295, GNorm = 0.0840, lr_0 = 1.2797e-04
Validation mae = 0.121205
Epoch 27
Loss = 2.5910e-03, PNorm = 184.4313, GNorm = 0.0618, lr_0 = 1.2788e-04
Loss = 1.3166e-03, PNorm = 184.4321, GNorm = 0.0915, lr_0 = 1.2780e-04
Loss = 1.7009e-03, PNorm = 184.4318, GNorm = 0.1729, lr_0 = 1.2771e-04
Loss = 2.5718e-03, PNorm = 184.4318, GNorm = 0.2353, lr_0 = 1.2762e-04
Loss = 1.2758e-03, PNorm = 184.4332, GNorm = 0.1008, lr_0 = 1.2753e-04
Loss = 2.2287e-03, PNorm = 184.4354, GNorm = 0.0914, lr_0 = 1.2745e-04
Loss = 3.6239e-03, PNorm = 184.4376, GNorm = 0.0733, lr_0 = 1.2736e-04
Loss = 1.3825e-03, PNorm = 184.4386, GNorm = 0.0476, lr_0 = 1.2727e-04
Loss = 2.2420e-03, PNorm = 184.4399, GNorm = 0.1097, lr_0 = 1.2718e-04
Loss = 1.8182e-03, PNorm = 184.4422, GNorm = 0.1623, lr_0 = 1.2710e-04
Loss = 2.7123e-03, PNorm = 184.4445, GNorm = 0.0742, lr_0 = 1.2701e-04
Loss = 2.4259e-03, PNorm = 184.4476, GNorm = 0.0671, lr_0 = 1.2692e-04
Loss = 2.1010e-03, PNorm = 184.4485, GNorm = 0.1394, lr_0 = 1.2684e-04
Loss = 2.4612e-03, PNorm = 184.4490, GNorm = 0.0544, lr_0 = 1.2675e-04
Loss = 6.0362e-03, PNorm = 184.4501, GNorm = 0.1307, lr_0 = 1.2666e-04
Loss = 1.8822e-03, PNorm = 184.4516, GNorm = 0.1554, lr_0 = 1.2658e-04
Loss = 4.8900e-03, PNorm = 184.4530, GNorm = 0.1226, lr_0 = 1.2649e-04
Loss = 2.3747e-03, PNorm = 184.4547, GNorm = 0.0918, lr_0 = 1.2640e-04
Loss = 4.1838e-03, PNorm = 184.4563, GNorm = 0.2817, lr_0 = 1.2632e-04
Loss = 1.6955e-03, PNorm = 184.4588, GNorm = 0.1078, lr_0 = 1.2623e-04
Loss = 4.9853e-03, PNorm = 184.4606, GNorm = 0.2423, lr_0 = 1.2614e-04
Loss = 2.7176e-03, PNorm = 184.4603, GNorm = 0.0976, lr_0 = 1.2606e-04
Loss = 2.8462e-03, PNorm = 184.4613, GNorm = 0.0820, lr_0 = 1.2597e-04
Loss = 3.1517e-03, PNorm = 184.4642, GNorm = 0.0779, lr_0 = 1.2588e-04
Loss = 1.2459e-03, PNorm = 184.4664, GNorm = 0.0782, lr_0 = 1.2580e-04
Loss = 1.0629e-03, PNorm = 184.4681, GNorm = 0.0352, lr_0 = 1.2571e-04
Loss = 1.6046e-03, PNorm = 184.4684, GNorm = 0.1090, lr_0 = 1.2563e-04
Loss = 1.2834e-03, PNorm = 184.4694, GNorm = 0.0954, lr_0 = 1.2554e-04
Loss = 5.9118e-03, PNorm = 184.4709, GNorm = 0.0948, lr_0 = 1.2545e-04
Loss = 3.4681e-03, PNorm = 184.4729, GNorm = 0.2928, lr_0 = 1.2537e-04
Loss = 4.9023e-03, PNorm = 184.4744, GNorm = 0.0493, lr_0 = 1.2528e-04
Loss = 3.0931e-03, PNorm = 184.4755, GNorm = 0.0919, lr_0 = 1.2520e-04
Loss = 3.4575e-03, PNorm = 184.4787, GNorm = 0.1004, lr_0 = 1.2511e-04
Loss = 2.1522e-03, PNorm = 184.4818, GNorm = 0.1098, lr_0 = 1.2502e-04
Loss = 2.8636e-03, PNorm = 184.4834, GNorm = 0.0742, lr_0 = 1.2494e-04
Loss = 2.1725e-03, PNorm = 184.4840, GNorm = 0.1272, lr_0 = 1.2485e-04
Loss = 3.5925e-03, PNorm = 184.4861, GNorm = 0.1422, lr_0 = 1.2477e-04
Loss = 4.5695e-03, PNorm = 184.4860, GNorm = 0.1079, lr_0 = 1.2468e-04
Loss = 4.8108e-03, PNorm = 184.4878, GNorm = 0.1715, lr_0 = 1.2460e-04
Loss = 8.4655e-03, PNorm = 184.4900, GNorm = 0.0483, lr_0 = 1.2451e-04
Loss = 1.6143e-03, PNorm = 184.4913, GNorm = 0.1370, lr_0 = 1.2443e-04
Loss = 2.9521e-03, PNorm = 184.4934, GNorm = 0.1047, lr_0 = 1.2434e-04
Loss = 2.0643e-03, PNorm = 184.4945, GNorm = 0.0784, lr_0 = 1.2426e-04
Loss = 1.6598e-03, PNorm = 184.4955, GNorm = 0.0961, lr_0 = 1.2417e-04
Loss = 3.4283e-03, PNorm = 184.4976, GNorm = 0.0860, lr_0 = 1.2409e-04
Loss = 3.3481e-03, PNorm = 184.5002, GNorm = 0.0946, lr_0 = 1.2400e-04
Loss = 3.7724e-03, PNorm = 184.5011, GNorm = 0.1523, lr_0 = 1.2392e-04
Loss = 1.8154e-03, PNorm = 184.5026, GNorm = 0.1398, lr_0 = 1.2383e-04
Loss = 1.0013e-02, PNorm = 184.5035, GNorm = 0.0685, lr_0 = 1.2375e-04
Loss = 1.3243e-03, PNorm = 184.5053, GNorm = 0.0500, lr_0 = 1.2366e-04
Loss = 5.2165e-03, PNorm = 184.5072, GNorm = 0.0939, lr_0 = 1.2358e-04
Loss = 1.2140e-02, PNorm = 184.5096, GNorm = 0.4476, lr_0 = 1.2349e-04
Loss = 3.8249e-03, PNorm = 184.5087, GNorm = 0.0940, lr_0 = 1.2341e-04
Loss = 4.1337e-03, PNorm = 184.5092, GNorm = 0.2747, lr_0 = 1.2332e-04
Loss = 1.6249e-03, PNorm = 184.5105, GNorm = 0.1071, lr_0 = 1.2324e-04
Loss = 4.7068e-03, PNorm = 184.5104, GNorm = 0.1117, lr_0 = 1.2315e-04
Loss = 1.9123e-03, PNorm = 184.5108, GNorm = 0.1934, lr_0 = 1.2307e-04
Loss = 2.3790e-03, PNorm = 184.5131, GNorm = 0.0655, lr_0 = 1.2298e-04
Loss = 2.4258e-03, PNorm = 184.5132, GNorm = 0.0966, lr_0 = 1.2290e-04
Loss = 2.7412e-03, PNorm = 184.5137, GNorm = 0.2016, lr_0 = 1.2282e-04
Loss = 3.2573e-03, PNorm = 184.5149, GNorm = 0.0899, lr_0 = 1.2273e-04
Loss = 1.7389e-03, PNorm = 184.5165, GNorm = 0.0581, lr_0 = 1.2265e-04
Loss = 2.8268e-03, PNorm = 184.5179, GNorm = 0.1095, lr_0 = 1.2256e-04
Loss = 3.0018e-03, PNorm = 184.5213, GNorm = 0.1161, lr_0 = 1.2248e-04
Loss = 4.9337e-03, PNorm = 184.5240, GNorm = 0.3378, lr_0 = 1.2240e-04
Loss = 5.8813e-03, PNorm = 184.5250, GNorm = 0.0950, lr_0 = 1.2231e-04
Loss = 1.7511e-03, PNorm = 184.5264, GNorm = 0.1759, lr_0 = 1.2223e-04
Loss = 8.3940e-03, PNorm = 184.5272, GNorm = 0.1029, lr_0 = 1.2214e-04
Loss = 5.0545e-03, PNorm = 184.5301, GNorm = 0.2196, lr_0 = 1.2206e-04
Loss = 2.9931e-03, PNorm = 184.5317, GNorm = 0.1270, lr_0 = 1.2198e-04
Loss = 3.0322e-03, PNorm = 184.5329, GNorm = 0.1869, lr_0 = 1.2189e-04
Loss = 2.8719e-03, PNorm = 184.5361, GNorm = 0.0401, lr_0 = 1.2181e-04
Loss = 2.3567e-03, PNorm = 184.5382, GNorm = 0.1765, lr_0 = 1.2173e-04
Loss = 2.0071e-03, PNorm = 184.5390, GNorm = 0.1096, lr_0 = 1.2164e-04
Loss = 2.4447e-03, PNorm = 184.5409, GNorm = 0.0710, lr_0 = 1.2156e-04
Loss = 2.3527e-03, PNorm = 184.5425, GNorm = 0.0909, lr_0 = 1.2148e-04
Loss = 2.8219e-03, PNorm = 184.5437, GNorm = 0.1260, lr_0 = 1.2139e-04
Loss = 6.7889e-03, PNorm = 184.5435, GNorm = 0.1564, lr_0 = 1.2131e-04
Loss = 5.0108e-03, PNorm = 184.5438, GNorm = 0.1686, lr_0 = 1.2123e-04
Loss = 2.4116e-03, PNorm = 184.5452, GNorm = 0.0906, lr_0 = 1.2114e-04
Loss = 2.6283e-03, PNorm = 184.5476, GNorm = 0.1323, lr_0 = 1.2106e-04
Loss = 1.8782e-03, PNorm = 184.5490, GNorm = 0.1064, lr_0 = 1.2098e-04
Loss = 2.5019e-03, PNorm = 184.5515, GNorm = 0.1392, lr_0 = 1.2090e-04
Loss = 5.9666e-03, PNorm = 184.5545, GNorm = 0.0468, lr_0 = 1.2081e-04
Loss = 4.1514e-03, PNorm = 184.5562, GNorm = 0.1232, lr_0 = 1.2073e-04
Loss = 4.6607e-03, PNorm = 184.5590, GNorm = 0.1156, lr_0 = 1.2065e-04
Loss = 3.2109e-03, PNorm = 184.5611, GNorm = 0.0592, lr_0 = 1.2056e-04
Loss = 7.2454e-03, PNorm = 184.5632, GNorm = 1.9300, lr_0 = 1.2048e-04
Loss = 4.0485e-03, PNorm = 184.5637, GNorm = 0.2454, lr_0 = 1.2040e-04
Loss = 3.2161e-03, PNorm = 184.5659, GNorm = 0.1291, lr_0 = 1.2032e-04
Loss = 3.7097e-03, PNorm = 184.5679, GNorm = 0.1321, lr_0 = 1.2023e-04
Loss = 3.5661e-03, PNorm = 184.5714, GNorm = 0.1173, lr_0 = 1.2015e-04
Loss = 1.2864e-03, PNorm = 184.5740, GNorm = 0.1062, lr_0 = 1.2007e-04
Loss = 3.5273e-03, PNorm = 184.5761, GNorm = 0.1612, lr_0 = 1.1999e-04
Loss = 5.1725e-03, PNorm = 184.5787, GNorm = 0.1302, lr_0 = 1.1991e-04
Loss = 1.2522e-03, PNorm = 184.5795, GNorm = 0.0913, lr_0 = 1.1982e-04
Loss = 1.7121e-03, PNorm = 184.5799, GNorm = 0.1629, lr_0 = 1.1974e-04
Loss = 4.1365e-03, PNorm = 184.5812, GNorm = 0.1321, lr_0 = 1.1966e-04
Loss = 3.5300e-03, PNorm = 184.5830, GNorm = 0.1269, lr_0 = 1.1958e-04
Loss = 1.2246e-03, PNorm = 184.5854, GNorm = 0.0475, lr_0 = 1.1950e-04
Loss = 2.0237e-03, PNorm = 184.5874, GNorm = 0.0467, lr_0 = 1.1941e-04
Loss = 2.1826e-03, PNorm = 184.5889, GNorm = 0.0668, lr_0 = 1.1933e-04
Loss = 2.2334e-03, PNorm = 184.5899, GNorm = 0.1475, lr_0 = 1.1925e-04
Loss = 2.6848e-03, PNorm = 184.5900, GNorm = 0.0901, lr_0 = 1.1917e-04
Loss = 3.5581e-03, PNorm = 184.5921, GNorm = 0.0349, lr_0 = 1.1909e-04
Loss = 1.8108e-03, PNorm = 184.5934, GNorm = 0.1605, lr_0 = 1.1901e-04
Loss = 3.4743e-03, PNorm = 184.5949, GNorm = 0.0864, lr_0 = 1.1892e-04
Loss = 4.7666e-03, PNorm = 184.5963, GNorm = 0.1722, lr_0 = 1.1884e-04
Loss = 2.0558e-03, PNorm = 184.5992, GNorm = 0.1113, lr_0 = 1.1876e-04
Loss = 2.1217e-03, PNorm = 184.6011, GNorm = 0.0878, lr_0 = 1.1868e-04
Loss = 2.8970e-03, PNorm = 184.6022, GNorm = 0.0605, lr_0 = 1.1860e-04
Loss = 2.6833e-03, PNorm = 184.6036, GNorm = 0.0428, lr_0 = 1.1852e-04
Loss = 1.5745e-03, PNorm = 184.6044, GNorm = 0.0885, lr_0 = 1.1844e-04
Loss = 6.4910e-03, PNorm = 184.6054, GNorm = 0.5447, lr_0 = 1.1835e-04
Loss = 1.8008e-03, PNorm = 184.6071, GNorm = 0.1001, lr_0 = 1.1827e-04
Loss = 3.4462e-03, PNorm = 184.6083, GNorm = 0.0910, lr_0 = 1.1819e-04
Loss = 2.4764e-03, PNorm = 184.6095, GNorm = 0.0820, lr_0 = 1.1811e-04
Loss = 3.0361e-03, PNorm = 184.6111, GNorm = 0.1045, lr_0 = 1.1803e-04
Loss = 7.1238e-03, PNorm = 184.6127, GNorm = 0.0499, lr_0 = 1.1795e-04
Loss = 3.9003e-03, PNorm = 184.6150, GNorm = 0.0625, lr_0 = 1.1787e-04
Validation mae = 0.121300
Epoch 28
Loss = 2.8078e-03, PNorm = 184.6158, GNorm = 0.3161, lr_0 = 1.1779e-04
Loss = 1.5023e-03, PNorm = 184.6171, GNorm = 0.2473, lr_0 = 1.1771e-04
Loss = 5.3752e-03, PNorm = 184.6198, GNorm = 0.0731, lr_0 = 1.1763e-04
Loss = 2.3346e-03, PNorm = 184.6207, GNorm = 0.2949, lr_0 = 1.1755e-04
Loss = 3.6528e-03, PNorm = 184.6214, GNorm = 0.1012, lr_0 = 1.1747e-04
Loss = 2.2649e-03, PNorm = 184.6238, GNorm = 0.1244, lr_0 = 1.1739e-04
Loss = 1.7590e-03, PNorm = 184.6259, GNorm = 0.0733, lr_0 = 1.1730e-04
Loss = 4.0498e-03, PNorm = 184.6266, GNorm = 0.1261, lr_0 = 1.1722e-04
Loss = 3.5835e-03, PNorm = 184.6285, GNorm = 0.1201, lr_0 = 1.1714e-04
Loss = 3.2980e-03, PNorm = 184.6307, GNorm = 0.1631, lr_0 = 1.1706e-04
Loss = 3.7169e-03, PNorm = 184.6313, GNorm = 0.2177, lr_0 = 1.1698e-04
Loss = 1.8418e-03, PNorm = 184.6331, GNorm = 0.1471, lr_0 = 1.1690e-04
Loss = 2.1692e-03, PNorm = 184.6343, GNorm = 0.0476, lr_0 = 1.1682e-04
Loss = 4.2743e-03, PNorm = 184.6366, GNorm = 0.0948, lr_0 = 1.1674e-04
Loss = 2.3010e-03, PNorm = 184.6385, GNorm = 0.0631, lr_0 = 1.1666e-04
Loss = 1.6063e-03, PNorm = 184.6391, GNorm = 0.0541, lr_0 = 1.1658e-04
Loss = 1.5421e-03, PNorm = 184.6400, GNorm = 0.0830, lr_0 = 1.1650e-04
Loss = 4.9616e-03, PNorm = 184.6412, GNorm = 0.0465, lr_0 = 1.1642e-04
Loss = 1.5047e-03, PNorm = 184.6421, GNorm = 0.0788, lr_0 = 1.1634e-04
Loss = 2.0822e-03, PNorm = 184.6431, GNorm = 0.1039, lr_0 = 1.1626e-04
Loss = 2.7724e-03, PNorm = 184.6446, GNorm = 0.1756, lr_0 = 1.1618e-04
Loss = 1.8840e-03, PNorm = 184.6455, GNorm = 0.1499, lr_0 = 1.1611e-04
Loss = 1.0698e-03, PNorm = 184.6468, GNorm = 0.1241, lr_0 = 1.1603e-04
Loss = 9.5222e-04, PNorm = 184.6489, GNorm = 0.0478, lr_0 = 1.1595e-04
Loss = 8.5182e-03, PNorm = 184.6507, GNorm = 0.0501, lr_0 = 1.1587e-04
Loss = 1.4822e-03, PNorm = 184.6524, GNorm = 0.0763, lr_0 = 1.1579e-04
Loss = 3.7687e-03, PNorm = 184.6538, GNorm = 0.2438, lr_0 = 1.1571e-04
Loss = 3.3184e-03, PNorm = 184.6553, GNorm = 0.1121, lr_0 = 1.1563e-04
Loss = 1.1225e-03, PNorm = 184.6560, GNorm = 0.1191, lr_0 = 1.1555e-04
Loss = 2.5610e-03, PNorm = 184.6567, GNorm = 0.1435, lr_0 = 1.1547e-04
Loss = 4.7704e-03, PNorm = 184.6581, GNorm = 0.1476, lr_0 = 1.1539e-04
Loss = 5.3462e-03, PNorm = 184.6595, GNorm = 0.0807, lr_0 = 1.1531e-04
Loss = 3.3534e-03, PNorm = 184.6603, GNorm = 0.1121, lr_0 = 1.1523e-04
Loss = 1.3221e-03, PNorm = 184.6617, GNorm = 0.0629, lr_0 = 1.1515e-04
Loss = 1.8847e-03, PNorm = 184.6619, GNorm = 0.0765, lr_0 = 1.1508e-04
Loss = 2.8258e-03, PNorm = 184.6615, GNorm = 0.1153, lr_0 = 1.1500e-04
Loss = 1.7418e-03, PNorm = 184.6624, GNorm = 0.1750, lr_0 = 1.1492e-04
Loss = 1.8196e-02, PNorm = 184.6638, GNorm = 0.2107, lr_0 = 1.1484e-04
Loss = 2.9562e-03, PNorm = 184.6646, GNorm = 0.2019, lr_0 = 1.1476e-04
Loss = 2.9916e-03, PNorm = 184.6656, GNorm = 0.1159, lr_0 = 1.1468e-04
Loss = 1.4217e-03, PNorm = 184.6666, GNorm = 0.0532, lr_0 = 1.1460e-04
Loss = 1.5108e-03, PNorm = 184.6681, GNorm = 0.1400, lr_0 = 1.1452e-04
Loss = 9.5653e-04, PNorm = 184.6701, GNorm = 0.1301, lr_0 = 1.1445e-04
Loss = 6.6316e-03, PNorm = 184.6720, GNorm = 0.0627, lr_0 = 1.1437e-04
Loss = 2.6579e-03, PNorm = 184.6732, GNorm = 0.0961, lr_0 = 1.1429e-04
Loss = 6.1243e-03, PNorm = 184.6753, GNorm = 0.0547, lr_0 = 1.1421e-04
Loss = 6.7602e-03, PNorm = 184.6774, GNorm = 0.0725, lr_0 = 1.1413e-04
Loss = 3.9754e-03, PNorm = 184.6792, GNorm = 0.0483, lr_0 = 1.1405e-04
Loss = 1.5432e-03, PNorm = 184.6805, GNorm = 0.2953, lr_0 = 1.1398e-04
Loss = 2.0222e-03, PNorm = 184.6809, GNorm = 0.0554, lr_0 = 1.1390e-04
Loss = 1.2389e-03, PNorm = 184.6806, GNorm = 0.0548, lr_0 = 1.1382e-04
Loss = 5.3764e-03, PNorm = 184.6810, GNorm = 0.0847, lr_0 = 1.1374e-04
Loss = 1.6180e-03, PNorm = 184.6817, GNorm = 0.0523, lr_0 = 1.1366e-04
Loss = 6.1193e-03, PNorm = 184.6828, GNorm = 0.3021, lr_0 = 1.1359e-04
Loss = 5.7994e-03, PNorm = 184.6848, GNorm = 0.0966, lr_0 = 1.1351e-04
Loss = 2.5343e-03, PNorm = 184.6864, GNorm = 0.1259, lr_0 = 1.1343e-04
Loss = 1.3137e-03, PNorm = 184.6879, GNorm = 0.0965, lr_0 = 1.1335e-04
Loss = 1.0111e-03, PNorm = 184.6883, GNorm = 0.1211, lr_0 = 1.1328e-04
Loss = 1.9350e-03, PNorm = 184.6896, GNorm = 0.0292, lr_0 = 1.1320e-04
Loss = 1.0297e-03, PNorm = 184.6906, GNorm = 0.0953, lr_0 = 1.1312e-04
Loss = 4.7040e-03, PNorm = 184.6921, GNorm = 0.0378, lr_0 = 1.1304e-04
Loss = 1.8895e-03, PNorm = 184.6930, GNorm = 0.1873, lr_0 = 1.1297e-04
Loss = 1.4334e-03, PNorm = 184.6930, GNorm = 0.1301, lr_0 = 1.1289e-04
Loss = 1.4765e-03, PNorm = 184.6933, GNorm = 0.0990, lr_0 = 1.1281e-04
Loss = 1.9406e-03, PNorm = 184.6946, GNorm = 0.0909, lr_0 = 1.1273e-04
Loss = 1.2690e-03, PNorm = 184.6963, GNorm = 0.0500, lr_0 = 1.1266e-04
Loss = 1.7620e-03, PNorm = 184.6975, GNorm = 0.0382, lr_0 = 1.1258e-04
Loss = 1.3462e-03, PNorm = 184.6983, GNorm = 0.0719, lr_0 = 1.1250e-04
Loss = 1.7292e-03, PNorm = 184.6990, GNorm = 0.1180, lr_0 = 1.1243e-04
Loss = 2.2393e-03, PNorm = 184.6998, GNorm = 0.1813, lr_0 = 1.1235e-04
Loss = 4.2347e-03, PNorm = 184.6995, GNorm = 0.0379, lr_0 = 1.1227e-04
Loss = 3.2627e-03, PNorm = 184.7004, GNorm = 0.2079, lr_0 = 1.1219e-04
Loss = 1.0992e-03, PNorm = 184.7010, GNorm = 0.0433, lr_0 = 1.1212e-04
Loss = 4.4051e-03, PNorm = 184.7030, GNorm = 0.0618, lr_0 = 1.1204e-04
Loss = 3.0147e-03, PNorm = 184.7040, GNorm = 0.1001, lr_0 = 1.1196e-04
Loss = 1.1020e-03, PNorm = 184.7051, GNorm = 0.1188, lr_0 = 1.1189e-04
Loss = 1.0700e-03, PNorm = 184.7066, GNorm = 0.1042, lr_0 = 1.1181e-04
Loss = 1.9066e-03, PNorm = 184.7084, GNorm = 0.0795, lr_0 = 1.1173e-04
Loss = 3.4068e-03, PNorm = 184.7088, GNorm = 0.1896, lr_0 = 1.1166e-04
Loss = 2.4155e-03, PNorm = 184.7108, GNorm = 0.0358, lr_0 = 1.1158e-04
Loss = 2.8680e-03, PNorm = 184.7135, GNorm = 0.1021, lr_0 = 1.1150e-04
Loss = 2.6100e-03, PNorm = 184.7153, GNorm = 0.1215, lr_0 = 1.1143e-04
Loss = 2.8763e-03, PNorm = 184.7170, GNorm = 0.0950, lr_0 = 1.1135e-04
Loss = 3.7435e-03, PNorm = 184.7183, GNorm = 0.0541, lr_0 = 1.1128e-04
Loss = 3.4003e-03, PNorm = 184.7185, GNorm = 0.0743, lr_0 = 1.1120e-04
Loss = 2.8671e-03, PNorm = 184.7190, GNorm = 0.0769, lr_0 = 1.1112e-04
Loss = 5.1761e-03, PNorm = 184.7207, GNorm = 0.0683, lr_0 = 1.1105e-04
Loss = 2.5251e-03, PNorm = 184.7239, GNorm = 0.0574, lr_0 = 1.1097e-04
Loss = 4.3275e-03, PNorm = 184.7257, GNorm = 0.0670, lr_0 = 1.1089e-04
Loss = 1.1000e-03, PNorm = 184.7273, GNorm = 0.0443, lr_0 = 1.1082e-04
Loss = 3.3441e-03, PNorm = 184.7299, GNorm = 0.1037, lr_0 = 1.1074e-04
Loss = 5.4373e-03, PNorm = 184.7307, GNorm = 0.1333, lr_0 = 1.1067e-04
Loss = 2.2057e-03, PNorm = 184.7327, GNorm = 0.0675, lr_0 = 1.1059e-04
Loss = 3.5526e-03, PNorm = 184.7354, GNorm = 0.0918, lr_0 = 1.1052e-04
Loss = 4.4045e-03, PNorm = 184.7373, GNorm = 0.1215, lr_0 = 1.1044e-04
Loss = 2.6172e-03, PNorm = 184.7380, GNorm = 0.0912, lr_0 = 1.1036e-04
Loss = 3.8356e-03, PNorm = 184.7400, GNorm = 0.0934, lr_0 = 1.1029e-04
Loss = 3.3323e-03, PNorm = 184.7417, GNorm = 0.1494, lr_0 = 1.1021e-04
Loss = 1.8548e-03, PNorm = 184.7438, GNorm = 0.0671, lr_0 = 1.1014e-04
Loss = 7.0593e-03, PNorm = 184.7463, GNorm = 0.0685, lr_0 = 1.1006e-04
Loss = 2.2962e-03, PNorm = 184.7481, GNorm = 0.0629, lr_0 = 1.0999e-04
Loss = 2.0679e-03, PNorm = 184.7494, GNorm = 0.1254, lr_0 = 1.0991e-04
Loss = 1.9408e-03, PNorm = 184.7504, GNorm = 0.0588, lr_0 = 1.0984e-04
Loss = 2.9896e-03, PNorm = 184.7516, GNorm = 0.0421, lr_0 = 1.0976e-04
Loss = 1.2289e-03, PNorm = 184.7532, GNorm = 0.0837, lr_0 = 1.0969e-04
Loss = 2.2640e-03, PNorm = 184.7539, GNorm = 0.0691, lr_0 = 1.0961e-04
Loss = 4.1691e-03, PNorm = 184.7549, GNorm = 0.1465, lr_0 = 1.0954e-04
Loss = 1.8783e-03, PNorm = 184.7560, GNorm = 0.0686, lr_0 = 1.0946e-04
Loss = 3.6214e-03, PNorm = 184.7576, GNorm = 0.0744, lr_0 = 1.0939e-04
Loss = 7.0830e-03, PNorm = 184.7607, GNorm = 0.0948, lr_0 = 1.0931e-04
Loss = 3.0933e-03, PNorm = 184.7636, GNorm = 0.1095, lr_0 = 1.0924e-04
Loss = 2.2707e-03, PNorm = 184.7647, GNorm = 0.1042, lr_0 = 1.0916e-04
Loss = 3.3057e-03, PNorm = 184.7657, GNorm = 0.1275, lr_0 = 1.0909e-04
Loss = 5.1884e-03, PNorm = 184.7665, GNorm = 0.2342, lr_0 = 1.0901e-04
Loss = 5.4002e-03, PNorm = 184.7671, GNorm = 0.0670, lr_0 = 1.0894e-04
Loss = 2.1459e-03, PNorm = 184.7675, GNorm = 0.1277, lr_0 = 1.0886e-04
Loss = 2.1205e-03, PNorm = 184.7679, GNorm = 0.1357, lr_0 = 1.0879e-04
Loss = 9.6804e-04, PNorm = 184.7687, GNorm = 0.1088, lr_0 = 1.0871e-04
Loss = 5.4276e-03, PNorm = 184.7690, GNorm = 0.0556, lr_0 = 1.0864e-04
Loss = 1.3116e-03, PNorm = 184.7715, GNorm = 0.1421, lr_0 = 1.0856e-04
Validation mae = 0.121186
Epoch 29
Loss = 3.4520e-03, PNorm = 184.7728, GNorm = 0.1589, lr_0 = 1.0849e-04
Loss = 2.6839e-03, PNorm = 184.7740, GNorm = 0.1097, lr_0 = 1.0841e-04
Loss = 3.6549e-03, PNorm = 184.7764, GNorm = 0.0698, lr_0 = 1.0834e-04
Loss = 1.0882e-03, PNorm = 184.7777, GNorm = 0.1834, lr_0 = 1.0827e-04
Loss = 1.1869e-03, PNorm = 184.7779, GNorm = 0.1564, lr_0 = 1.0819e-04
Loss = 3.9906e-03, PNorm = 184.7788, GNorm = 0.0626, lr_0 = 1.0812e-04
Loss = 1.8614e-03, PNorm = 184.7799, GNorm = 0.1102, lr_0 = 1.0804e-04
Loss = 3.2257e-03, PNorm = 184.7803, GNorm = 0.1110, lr_0 = 1.0797e-04
Loss = 1.5253e-03, PNorm = 184.7814, GNorm = 0.0813, lr_0 = 1.0790e-04
Loss = 2.6079e-03, PNorm = 184.7822, GNorm = 0.0600, lr_0 = 1.0782e-04
Loss = 1.8076e-03, PNorm = 184.7831, GNorm = 0.0932, lr_0 = 1.0775e-04
Loss = 3.1691e-03, PNorm = 184.7840, GNorm = 0.1646, lr_0 = 1.0767e-04
Loss = 5.4796e-03, PNorm = 184.7849, GNorm = 1.0355, lr_0 = 1.0760e-04
Loss = 2.0423e-03, PNorm = 184.7856, GNorm = 0.1160, lr_0 = 1.0753e-04
Loss = 1.3734e-03, PNorm = 184.7867, GNorm = 0.0656, lr_0 = 1.0745e-04
Loss = 1.6471e-03, PNorm = 184.7885, GNorm = 0.0430, lr_0 = 1.0738e-04
Loss = 1.8655e-03, PNorm = 184.7900, GNorm = 0.0849, lr_0 = 1.0731e-04
Loss = 2.0711e-03, PNorm = 184.7915, GNorm = 0.0659, lr_0 = 1.0723e-04
Loss = 3.9865e-03, PNorm = 184.7935, GNorm = 0.0650, lr_0 = 1.0716e-04
Loss = 1.1513e-03, PNorm = 184.7944, GNorm = 0.0765, lr_0 = 1.0709e-04
Loss = 6.9472e-03, PNorm = 184.7954, GNorm = 0.1877, lr_0 = 1.0701e-04
Loss = 5.1010e-03, PNorm = 184.7964, GNorm = 0.0320, lr_0 = 1.0694e-04
Loss = 3.9481e-03, PNorm = 184.7980, GNorm = 0.0870, lr_0 = 1.0687e-04
Loss = 1.6390e-03, PNorm = 184.7992, GNorm = 0.0620, lr_0 = 1.0679e-04
Loss = 6.7172e-03, PNorm = 184.8011, GNorm = 0.1729, lr_0 = 1.0672e-04
Loss = 1.6265e-03, PNorm = 184.8031, GNorm = 0.1425, lr_0 = 1.0665e-04
Loss = 4.3983e-03, PNorm = 184.8045, GNorm = 0.2931, lr_0 = 1.0657e-04
Loss = 1.9482e-03, PNorm = 184.8051, GNorm = 0.0942, lr_0 = 1.0650e-04
Loss = 2.2288e-03, PNorm = 184.8065, GNorm = 0.1413, lr_0 = 1.0643e-04
Loss = 6.9372e-03, PNorm = 184.8067, GNorm = 0.1831, lr_0 = 1.0635e-04
Loss = 3.7666e-03, PNorm = 184.8088, GNorm = 0.1127, lr_0 = 1.0628e-04
Loss = 1.6228e-03, PNorm = 184.8114, GNorm = 0.1044, lr_0 = 1.0621e-04
Loss = 1.7264e-03, PNorm = 184.8130, GNorm = 0.1120, lr_0 = 1.0614e-04
Loss = 3.6853e-03, PNorm = 184.8145, GNorm = 0.1897, lr_0 = 1.0606e-04
Loss = 2.5704e-03, PNorm = 184.8159, GNorm = 0.1587, lr_0 = 1.0599e-04
Loss = 1.7920e-03, PNorm = 184.8184, GNorm = 0.1049, lr_0 = 1.0592e-04
Loss = 2.8548e-03, PNorm = 184.8203, GNorm = 0.0522, lr_0 = 1.0585e-04
Loss = 2.8900e-03, PNorm = 184.8212, GNorm = 0.0839, lr_0 = 1.0577e-04
Loss = 1.7172e-03, PNorm = 184.8221, GNorm = 0.0888, lr_0 = 1.0570e-04
Loss = 1.4400e-03, PNorm = 184.8241, GNorm = 0.0533, lr_0 = 1.0563e-04
Loss = 1.1102e-03, PNorm = 184.8262, GNorm = 0.0559, lr_0 = 1.0556e-04
Loss = 1.5111e-03, PNorm = 184.8274, GNorm = 0.0362, lr_0 = 1.0548e-04
Loss = 1.6312e-03, PNorm = 184.8294, GNorm = 0.0849, lr_0 = 1.0541e-04
Loss = 1.1759e-03, PNorm = 184.8301, GNorm = 0.0424, lr_0 = 1.0534e-04
Loss = 2.0119e-03, PNorm = 184.8303, GNorm = 0.0651, lr_0 = 1.0527e-04
Loss = 7.2049e-03, PNorm = 184.8318, GNorm = 0.0697, lr_0 = 1.0519e-04
Loss = 2.8153e-03, PNorm = 184.8343, GNorm = 0.1646, lr_0 = 1.0512e-04
Loss = 3.3080e-03, PNorm = 184.8352, GNorm = 0.0783, lr_0 = 1.0505e-04
Loss = 9.2016e-04, PNorm = 184.8358, GNorm = 0.1125, lr_0 = 1.0498e-04
Loss = 1.3304e-03, PNorm = 184.8363, GNorm = 0.0357, lr_0 = 1.0491e-04
Loss = 2.9720e-03, PNorm = 184.8372, GNorm = 0.0447, lr_0 = 1.0483e-04
Loss = 8.3508e-04, PNorm = 184.8383, GNorm = 0.0579, lr_0 = 1.0476e-04
Loss = 2.8400e-03, PNorm = 184.8396, GNorm = 0.8280, lr_0 = 1.0469e-04
Loss = 1.6150e-03, PNorm = 184.8409, GNorm = 0.1449, lr_0 = 1.0462e-04
Loss = 8.1199e-04, PNorm = 184.8414, GNorm = 0.0881, lr_0 = 1.0455e-04
Loss = 1.1595e-03, PNorm = 184.8424, GNorm = 0.3349, lr_0 = 1.0448e-04
Loss = 2.1964e-03, PNorm = 184.8434, GNorm = 0.0958, lr_0 = 1.0440e-04
Loss = 1.9872e-03, PNorm = 184.8435, GNorm = 0.0455, lr_0 = 1.0433e-04
Loss = 9.2300e-04, PNorm = 184.8443, GNorm = 0.0792, lr_0 = 1.0426e-04
Loss = 1.7726e-03, PNorm = 184.8452, GNorm = 0.0378, lr_0 = 1.0419e-04
Loss = 1.8872e-03, PNorm = 184.8459, GNorm = 0.0404, lr_0 = 1.0412e-04
Loss = 3.3152e-03, PNorm = 184.8466, GNorm = 0.0834, lr_0 = 1.0405e-04
Loss = 2.3718e-03, PNorm = 184.8471, GNorm = 0.0873, lr_0 = 1.0398e-04
Loss = 3.6949e-03, PNorm = 184.8490, GNorm = 0.1313, lr_0 = 1.0391e-04
Loss = 1.3973e-02, PNorm = 184.8508, GNorm = 0.0787, lr_0 = 1.0383e-04
Loss = 7.5434e-03, PNorm = 184.8512, GNorm = 0.1350, lr_0 = 1.0376e-04
Loss = 1.6654e-03, PNorm = 184.8503, GNorm = 0.1362, lr_0 = 1.0369e-04
Loss = 1.5383e-03, PNorm = 184.8508, GNorm = 0.0789, lr_0 = 1.0362e-04
Loss = 1.1710e-03, PNorm = 184.8521, GNorm = 0.0900, lr_0 = 1.0355e-04
Loss = 3.3351e-03, PNorm = 184.8538, GNorm = 0.1053, lr_0 = 1.0348e-04
Loss = 3.9358e-03, PNorm = 184.8556, GNorm = 0.1172, lr_0 = 1.0341e-04
Loss = 3.1822e-03, PNorm = 184.8569, GNorm = 0.0962, lr_0 = 1.0334e-04
Loss = 3.7816e-03, PNorm = 184.8575, GNorm = 0.0714, lr_0 = 1.0327e-04
Loss = 2.6168e-03, PNorm = 184.8584, GNorm = 0.0979, lr_0 = 1.0320e-04
Loss = 4.9768e-03, PNorm = 184.8591, GNorm = 0.1657, lr_0 = 1.0312e-04
Loss = 1.6020e-03, PNorm = 184.8592, GNorm = 0.0671, lr_0 = 1.0305e-04
Loss = 5.4351e-03, PNorm = 184.8593, GNorm = 0.1092, lr_0 = 1.0298e-04
Loss = 2.5286e-03, PNorm = 184.8596, GNorm = 0.0827, lr_0 = 1.0291e-04
Loss = 1.0898e-03, PNorm = 184.8600, GNorm = 0.0630, lr_0 = 1.0284e-04
Loss = 2.5348e-03, PNorm = 184.8605, GNorm = 0.0743, lr_0 = 1.0277e-04
Loss = 2.8290e-03, PNorm = 184.8617, GNorm = 0.0816, lr_0 = 1.0270e-04
Loss = 4.7909e-03, PNorm = 184.8635, GNorm = 0.1842, lr_0 = 1.0263e-04
Loss = 1.9315e-03, PNorm = 184.8647, GNorm = 0.0471, lr_0 = 1.0256e-04
Loss = 3.4976e-03, PNorm = 184.8666, GNorm = 0.3391, lr_0 = 1.0249e-04
Loss = 1.7780e-03, PNorm = 184.8679, GNorm = 0.1125, lr_0 = 1.0242e-04
Loss = 1.7854e-03, PNorm = 184.8685, GNorm = 0.0523, lr_0 = 1.0235e-04
Loss = 1.2899e-03, PNorm = 184.8695, GNorm = 0.0833, lr_0 = 1.0228e-04
Loss = 1.2553e-03, PNorm = 184.8714, GNorm = 0.0334, lr_0 = 1.0221e-04
Loss = 1.1776e-03, PNorm = 184.8730, GNorm = 0.0588, lr_0 = 1.0214e-04
Loss = 1.7536e-03, PNorm = 184.8739, GNorm = 0.1089, lr_0 = 1.0207e-04
Loss = 1.8115e-03, PNorm = 184.8743, GNorm = 0.1246, lr_0 = 1.0200e-04
Loss = 2.4549e-03, PNorm = 184.8751, GNorm = 0.1266, lr_0 = 1.0193e-04
Loss = 1.0984e-02, PNorm = 184.8766, GNorm = 0.0502, lr_0 = 1.0186e-04
Loss = 1.9768e-03, PNorm = 184.8784, GNorm = 0.0384, lr_0 = 1.0179e-04
Loss = 3.7084e-03, PNorm = 184.8782, GNorm = 0.1446, lr_0 = 1.0172e-04
Loss = 2.6898e-03, PNorm = 184.8795, GNorm = 0.2257, lr_0 = 1.0165e-04
Loss = 1.8562e-03, PNorm = 184.8801, GNorm = 0.0597, lr_0 = 1.0158e-04
Loss = 9.7824e-04, PNorm = 184.8810, GNorm = 0.1089, lr_0 = 1.0151e-04
Loss = 4.5830e-03, PNorm = 184.8821, GNorm = 0.0499, lr_0 = 1.0144e-04
Loss = 2.3150e-03, PNorm = 184.8836, GNorm = 0.0960, lr_0 = 1.0137e-04
Loss = 4.4233e-03, PNorm = 184.8847, GNorm = 0.1116, lr_0 = 1.0130e-04
Loss = 3.0165e-03, PNorm = 184.8854, GNorm = 0.1127, lr_0 = 1.0123e-04
Loss = 2.5501e-03, PNorm = 184.8861, GNorm = 0.0656, lr_0 = 1.0116e-04
Loss = 1.8210e-03, PNorm = 184.8866, GNorm = 0.0691, lr_0 = 1.0110e-04
Loss = 6.2537e-03, PNorm = 184.8875, GNorm = 0.1993, lr_0 = 1.0103e-04
Loss = 5.1308e-03, PNorm = 184.8899, GNorm = 0.0718, lr_0 = 1.0096e-04
Loss = 2.3629e-03, PNorm = 184.8917, GNorm = 0.1346, lr_0 = 1.0089e-04
Loss = 1.6253e-03, PNorm = 184.8920, GNorm = 0.0544, lr_0 = 1.0082e-04
Loss = 9.3382e-04, PNorm = 184.8932, GNorm = 0.0485, lr_0 = 1.0075e-04
Loss = 1.7208e-03, PNorm = 184.8939, GNorm = 0.0418, lr_0 = 1.0068e-04
Loss = 8.1965e-04, PNorm = 184.8945, GNorm = 0.0701, lr_0 = 1.0061e-04
Loss = 2.3671e-03, PNorm = 184.8948, GNorm = 0.1761, lr_0 = 1.0054e-04
Loss = 1.5193e-03, PNorm = 184.8943, GNorm = 0.0800, lr_0 = 1.0047e-04
Loss = 5.3272e-03, PNorm = 184.8952, GNorm = 0.0844, lr_0 = 1.0041e-04
Loss = 5.3168e-03, PNorm = 184.8971, GNorm = 0.1346, lr_0 = 1.0034e-04
Loss = 2.1332e-03, PNorm = 184.8979, GNorm = 0.1492, lr_0 = 1.0027e-04
Loss = 9.9356e-04, PNorm = 184.8993, GNorm = 0.0953, lr_0 = 1.0020e-04
Loss = 4.3555e-03, PNorm = 184.9017, GNorm = 0.1217, lr_0 = 1.0013e-04
Loss = 3.4525e-03, PNorm = 184.9043, GNorm = 0.1312, lr_0 = 1.0006e-04
Loss = 1.6231e-03, PNorm = 184.9052, GNorm = 0.0517, lr_0 = 1.0000e-04
Validation mae = 0.121255
Model 0 best validation mae = 0.121186 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119969
Ensemble test mae = 0.119969
Fold 3
Splitting data with seed 3
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 1.0193e+00, PNorm = 64.6293, GNorm = 2.6202, lr_0 = 1.0413e-04
Loss = 8.1795e-01, PNorm = 64.6406, GNorm = 2.1516, lr_0 = 1.0788e-04
Loss = 7.1388e-01, PNorm = 64.6515, GNorm = 1.8689, lr_0 = 1.1163e-04
Loss = 7.1909e-01, PNorm = 64.6615, GNorm = 1.8543, lr_0 = 1.1537e-04
Loss = 7.0015e-01, PNorm = 64.6710, GNorm = 2.9073, lr_0 = 1.1913e-04
Loss = 7.3766e-01, PNorm = 64.6798, GNorm = 2.5210, lr_0 = 1.2287e-04
Loss = 7.5710e-01, PNorm = 64.6902, GNorm = 2.4022, lr_0 = 1.2663e-04
Loss = 6.0874e-01, PNorm = 64.6987, GNorm = 1.5040, lr_0 = 1.3038e-04
Loss = 5.5427e-01, PNorm = 64.7065, GNorm = 2.1753, lr_0 = 1.3413e-04
Loss = 5.6551e-01, PNorm = 64.7153, GNorm = 2.0446, lr_0 = 1.3788e-04
Loss = 6.5643e-01, PNorm = 64.7256, GNorm = 2.4973, lr_0 = 1.4163e-04
Loss = 6.9715e-01, PNorm = 64.7378, GNorm = 4.4437, lr_0 = 1.4537e-04
Loss = 6.4398e-01, PNorm = 64.7493, GNorm = 4.1985, lr_0 = 1.4913e-04
Loss = 6.2046e-01, PNorm = 64.7608, GNorm = 2.6589, lr_0 = 1.5288e-04
Loss = 6.5204e-01, PNorm = 64.7724, GNorm = 2.3616, lr_0 = 1.5662e-04
Loss = 5.2063e-01, PNorm = 64.7835, GNorm = 2.4287, lr_0 = 1.6038e-04
Loss = 5.6669e-01, PNorm = 64.7963, GNorm = 2.3211, lr_0 = 1.6412e-04
Loss = 6.5129e-01, PNorm = 64.8083, GNorm = 5.0508, lr_0 = 1.6788e-04
Loss = 6.0460e-01, PNorm = 64.8211, GNorm = 2.8142, lr_0 = 1.7163e-04
Loss = 6.5965e-01, PNorm = 64.8356, GNorm = 2.6085, lr_0 = 1.7538e-04
Loss = 6.5650e-01, PNorm = 64.8501, GNorm = 2.1787, lr_0 = 1.7913e-04
Loss = 5.8651e-01, PNorm = 64.8626, GNorm = 1.9368, lr_0 = 1.8288e-04
Loss = 5.8225e-01, PNorm = 64.8763, GNorm = 3.0182, lr_0 = 1.8662e-04
Loss = 6.6048e-01, PNorm = 64.8893, GNorm = 1.5467, lr_0 = 1.9038e-04
Loss = 5.5532e-01, PNorm = 64.9049, GNorm = 1.8099, lr_0 = 1.9413e-04
Loss = 5.7271e-01, PNorm = 64.9191, GNorm = 2.1516, lr_0 = 1.9788e-04
Loss = 5.3014e-01, PNorm = 64.9329, GNorm = 1.7785, lr_0 = 2.0163e-04
Loss = 5.2974e-01, PNorm = 64.9507, GNorm = 2.1364, lr_0 = 2.0537e-04
Loss = 6.3863e-01, PNorm = 64.9654, GNorm = 1.8837, lr_0 = 2.0913e-04
Loss = 5.5157e-01, PNorm = 64.9824, GNorm = 1.8404, lr_0 = 2.1288e-04
Loss = 5.2475e-01, PNorm = 65.0019, GNorm = 2.6669, lr_0 = 2.1663e-04
Loss = 5.8550e-01, PNorm = 65.0200, GNorm = 1.7380, lr_0 = 2.2038e-04
Loss = 5.5013e-01, PNorm = 65.0357, GNorm = 2.1992, lr_0 = 2.2412e-04
Loss = 5.6688e-01, PNorm = 65.0537, GNorm = 1.9544, lr_0 = 2.2787e-04
Loss = 6.7014e-01, PNorm = 65.0719, GNorm = 2.0549, lr_0 = 2.3163e-04
Loss = 6.2630e-01, PNorm = 65.0945, GNorm = 1.9092, lr_0 = 2.3538e-04
Loss = 6.0809e-01, PNorm = 65.1173, GNorm = 1.4423, lr_0 = 2.3913e-04
Loss = 5.3778e-01, PNorm = 65.1365, GNorm = 1.8537, lr_0 = 2.4288e-04
Loss = 5.2340e-01, PNorm = 65.1587, GNorm = 1.6503, lr_0 = 2.4662e-04
Loss = 6.6627e-01, PNorm = 65.1802, GNorm = 1.7723, lr_0 = 2.5038e-04
Loss = 5.4560e-01, PNorm = 65.2025, GNorm = 2.2117, lr_0 = 2.5413e-04
Loss = 6.2006e-01, PNorm = 65.2253, GNorm = 2.3345, lr_0 = 2.5788e-04
Loss = 5.5134e-01, PNorm = 65.2492, GNorm = 3.4344, lr_0 = 2.6163e-04
Loss = 6.0896e-01, PNorm = 65.2728, GNorm = 1.3720, lr_0 = 2.6537e-04
Loss = 6.0432e-01, PNorm = 65.2992, GNorm = 1.7465, lr_0 = 2.6912e-04
Loss = 5.4430e-01, PNorm = 65.3219, GNorm = 1.4899, lr_0 = 2.7288e-04
Loss = 5.8936e-01, PNorm = 65.3443, GNorm = 1.7452, lr_0 = 2.7663e-04
Loss = 5.5467e-01, PNorm = 65.3676, GNorm = 1.4024, lr_0 = 2.8038e-04
Loss = 5.3158e-01, PNorm = 65.3976, GNorm = 1.8027, lr_0 = 2.8413e-04
Loss = 5.1069e-01, PNorm = 65.4209, GNorm = 1.7044, lr_0 = 2.8787e-04
Loss = 5.5171e-01, PNorm = 65.4476, GNorm = 1.8727, lr_0 = 2.9163e-04
Loss = 4.8660e-01, PNorm = 65.4722, GNorm = 1.9435, lr_0 = 2.9538e-04
Loss = 5.1594e-01, PNorm = 65.4971, GNorm = 1.5797, lr_0 = 2.9913e-04
Loss = 5.0455e-01, PNorm = 65.5203, GNorm = 1.4156, lr_0 = 3.0288e-04
Loss = 5.7069e-01, PNorm = 65.5467, GNorm = 1.6823, lr_0 = 3.0662e-04
Loss = 4.8070e-01, PNorm = 65.5747, GNorm = 1.8764, lr_0 = 3.1037e-04
Loss = 5.1630e-01, PNorm = 65.6026, GNorm = 2.8417, lr_0 = 3.1413e-04
Loss = 5.2771e-01, PNorm = 65.6337, GNorm = 1.7674, lr_0 = 3.1788e-04
Loss = 5.3104e-01, PNorm = 65.6657, GNorm = 1.4922, lr_0 = 3.2163e-04
Loss = 5.5375e-01, PNorm = 65.7009, GNorm = 1.7780, lr_0 = 3.2538e-04
Loss = 6.7247e-01, PNorm = 65.7320, GNorm = 1.4628, lr_0 = 3.2912e-04
Loss = 5.7308e-01, PNorm = 65.7661, GNorm = 1.8034, lr_0 = 3.3288e-04
Loss = 5.6837e-01, PNorm = 65.8058, GNorm = 1.4252, lr_0 = 3.3663e-04
Loss = 5.4846e-01, PNorm = 65.8381, GNorm = 1.5232, lr_0 = 3.4038e-04
Loss = 5.3340e-01, PNorm = 65.8704, GNorm = 1.4946, lr_0 = 3.4413e-04
Loss = 5.2890e-01, PNorm = 65.9069, GNorm = 1.4761, lr_0 = 3.4787e-04
Loss = 5.2445e-01, PNorm = 65.9448, GNorm = 1.4687, lr_0 = 3.5162e-04
Loss = 5.2581e-01, PNorm = 65.9783, GNorm = 1.3282, lr_0 = 3.5538e-04
Loss = 5.0670e-01, PNorm = 66.0156, GNorm = 2.1883, lr_0 = 3.5913e-04
Loss = 5.1635e-01, PNorm = 66.0488, GNorm = 1.2649, lr_0 = 3.6288e-04
Loss = 4.8835e-01, PNorm = 66.0862, GNorm = 1.1387, lr_0 = 3.6662e-04
Loss = 5.0851e-01, PNorm = 66.1225, GNorm = 1.6071, lr_0 = 3.7037e-04
Loss = 6.2897e-01, PNorm = 66.1579, GNorm = 1.9082, lr_0 = 3.7413e-04
Loss = 5.0615e-01, PNorm = 66.1987, GNorm = 1.3286, lr_0 = 3.7788e-04
Loss = 5.5623e-01, PNorm = 66.2397, GNorm = 1.4559, lr_0 = 3.8163e-04
Loss = 4.6174e-01, PNorm = 66.2812, GNorm = 1.5396, lr_0 = 3.8537e-04
Loss = 5.1491e-01, PNorm = 66.3201, GNorm = 1.3958, lr_0 = 3.8912e-04
Loss = 4.7442e-01, PNorm = 66.3625, GNorm = 1.0634, lr_0 = 3.9287e-04
Loss = 5.0758e-01, PNorm = 66.3997, GNorm = 1.7337, lr_0 = 3.9663e-04
Loss = 5.1023e-01, PNorm = 66.4429, GNorm = 1.2794, lr_0 = 4.0038e-04
Loss = 5.0666e-01, PNorm = 66.4871, GNorm = 1.7754, lr_0 = 4.0413e-04
Loss = 4.5972e-01, PNorm = 66.5233, GNorm = 1.2481, lr_0 = 4.0787e-04
Loss = 4.9844e-01, PNorm = 66.5627, GNorm = 1.2310, lr_0 = 4.1162e-04
Loss = 5.0258e-01, PNorm = 66.6031, GNorm = 1.3380, lr_0 = 4.1537e-04
Loss = 4.8466e-01, PNorm = 66.6431, GNorm = 1.4192, lr_0 = 4.1913e-04
Loss = 5.8688e-01, PNorm = 66.6865, GNorm = 1.1387, lr_0 = 4.2288e-04
Loss = 4.8110e-01, PNorm = 66.7360, GNorm = 1.2364, lr_0 = 4.2662e-04
Loss = 4.7610e-01, PNorm = 66.7782, GNorm = 1.2263, lr_0 = 4.3037e-04
Loss = 5.5850e-01, PNorm = 66.8237, GNorm = 1.2484, lr_0 = 4.3412e-04
Loss = 5.2934e-01, PNorm = 66.8733, GNorm = 1.1829, lr_0 = 4.3788e-04
Loss = 4.8831e-01, PNorm = 66.9181, GNorm = 1.9424, lr_0 = 4.4163e-04
Loss = 5.0117e-01, PNorm = 66.9683, GNorm = 1.4086, lr_0 = 4.4538e-04
Loss = 5.7044e-01, PNorm = 67.0215, GNorm = 1.3543, lr_0 = 4.4912e-04
Loss = 5.4561e-01, PNorm = 67.0770, GNorm = 1.3106, lr_0 = 4.5287e-04
Loss = 5.5716e-01, PNorm = 67.1361, GNorm = 2.3154, lr_0 = 4.5662e-04
Loss = 4.9240e-01, PNorm = 67.1971, GNorm = 1.6397, lr_0 = 4.6038e-04
Loss = 4.9823e-01, PNorm = 67.2556, GNorm = 1.2575, lr_0 = 4.6413e-04
Loss = 4.3046e-01, PNorm = 67.3117, GNorm = 1.2736, lr_0 = 4.6787e-04
Loss = 4.7054e-01, PNorm = 67.3605, GNorm = 1.4177, lr_0 = 4.7162e-04
Loss = 4.4881e-01, PNorm = 67.4130, GNorm = 2.6709, lr_0 = 4.7537e-04
Loss = 4.7551e-01, PNorm = 67.4641, GNorm = 1.3757, lr_0 = 4.7913e-04
Loss = 5.0955e-01, PNorm = 67.5226, GNorm = 1.6457, lr_0 = 4.8288e-04
Loss = 4.7659e-01, PNorm = 67.5746, GNorm = 1.4542, lr_0 = 4.8663e-04
Loss = 4.9121e-01, PNorm = 67.6291, GNorm = 1.3213, lr_0 = 4.9038e-04
Loss = 5.0699e-01, PNorm = 67.6878, GNorm = 1.2895, lr_0 = 4.9412e-04
Loss = 5.6115e-01, PNorm = 67.7474, GNorm = 1.1428, lr_0 = 4.9788e-04
Loss = 5.5958e-01, PNorm = 67.8129, GNorm = 1.4410, lr_0 = 5.0163e-04
Loss = 4.7073e-01, PNorm = 67.8797, GNorm = 1.3367, lr_0 = 5.0538e-04
Loss = 5.1032e-01, PNorm = 67.9396, GNorm = 1.4786, lr_0 = 5.0913e-04
Loss = 4.9605e-01, PNorm = 68.0105, GNorm = 1.2125, lr_0 = 5.1287e-04
Loss = 5.3011e-01, PNorm = 68.0792, GNorm = 1.5492, lr_0 = 5.1663e-04
Loss = 4.4192e-01, PNorm = 68.1443, GNorm = 1.1967, lr_0 = 5.2038e-04
Loss = 6.0413e-01, PNorm = 68.2051, GNorm = 0.9460, lr_0 = 5.2413e-04
Loss = 5.2932e-01, PNorm = 68.2732, GNorm = 1.4882, lr_0 = 5.2788e-04
Loss = 4.8038e-01, PNorm = 68.3383, GNorm = 1.3807, lr_0 = 5.3162e-04
Loss = 4.6692e-01, PNorm = 68.4060, GNorm = 1.0467, lr_0 = 5.3538e-04
Loss = 4.3788e-01, PNorm = 68.4752, GNorm = 1.5480, lr_0 = 5.3912e-04
Loss = 5.2127e-01, PNorm = 68.5375, GNorm = 1.7275, lr_0 = 5.4288e-04
Loss = 4.8118e-01, PNorm = 68.5986, GNorm = 1.6025, lr_0 = 5.4663e-04
Loss = 5.0414e-01, PNorm = 68.6752, GNorm = 1.3832, lr_0 = 5.5038e-04
Validation mae = 0.128395
Epoch 1
Loss = 3.8259e-01, PNorm = 68.7538, GNorm = 1.9432, lr_0 = 5.5413e-04
Loss = 3.5932e-01, PNorm = 68.8368, GNorm = 1.1661, lr_0 = 5.5787e-04
Loss = 3.9802e-01, PNorm = 68.9091, GNorm = 1.1432, lr_0 = 5.6163e-04
Loss = 3.6140e-01, PNorm = 68.9909, GNorm = 0.8562, lr_0 = 5.6538e-04
Loss = 3.8702e-01, PNorm = 69.0641, GNorm = 1.3116, lr_0 = 5.6913e-04
Loss = 3.9275e-01, PNorm = 69.1480, GNorm = 1.3557, lr_0 = 5.7288e-04
Loss = 4.0928e-01, PNorm = 69.2302, GNorm = 1.2467, lr_0 = 5.7662e-04
Loss = 3.6887e-01, PNorm = 69.3188, GNorm = 1.4074, lr_0 = 5.8038e-04
Loss = 4.4901e-01, PNorm = 69.4089, GNorm = 1.2347, lr_0 = 5.8413e-04
Loss = 4.4809e-01, PNorm = 69.5171, GNorm = 1.7155, lr_0 = 5.8788e-04
Loss = 4.2496e-01, PNorm = 69.6162, GNorm = 1.9371, lr_0 = 5.9163e-04
Loss = 3.7102e-01, PNorm = 69.7084, GNorm = 1.4395, lr_0 = 5.9538e-04
Loss = 4.3515e-01, PNorm = 69.8205, GNorm = 1.4701, lr_0 = 5.9913e-04
Loss = 3.4279e-01, PNorm = 69.9221, GNorm = 0.9647, lr_0 = 6.0288e-04
Loss = 4.4703e-01, PNorm = 70.0288, GNorm = 1.1116, lr_0 = 6.0663e-04
Loss = 4.0347e-01, PNorm = 70.1349, GNorm = 1.8150, lr_0 = 6.1038e-04
Loss = 3.2677e-01, PNorm = 70.2433, GNorm = 1.0814, lr_0 = 6.1413e-04
Loss = 3.3874e-01, PNorm = 70.3490, GNorm = 1.0951, lr_0 = 6.1788e-04
Loss = 3.6540e-01, PNorm = 70.4576, GNorm = 1.0233, lr_0 = 6.2163e-04
Loss = 3.6537e-01, PNorm = 70.5628, GNorm = 1.1863, lr_0 = 6.2538e-04
Loss = 3.6900e-01, PNorm = 70.6659, GNorm = 1.1446, lr_0 = 6.2913e-04
Loss = 4.6954e-01, PNorm = 70.7770, GNorm = 1.4593, lr_0 = 6.3288e-04
Loss = 3.8392e-01, PNorm = 70.8971, GNorm = 1.0412, lr_0 = 6.3663e-04
Loss = 4.1771e-01, PNorm = 71.0055, GNorm = 1.3548, lr_0 = 6.4038e-04
Loss = 4.2288e-01, PNorm = 71.1224, GNorm = 1.4751, lr_0 = 6.4413e-04
Loss = 4.1491e-01, PNorm = 71.2427, GNorm = 1.2499, lr_0 = 6.4788e-04
Loss = 3.9361e-01, PNorm = 71.3619, GNorm = 1.0510, lr_0 = 6.5163e-04
Loss = 4.2309e-01, PNorm = 71.4722, GNorm = 1.2214, lr_0 = 6.5538e-04
Loss = 4.7457e-01, PNorm = 71.5939, GNorm = 1.4988, lr_0 = 6.5913e-04
Loss = 4.4494e-01, PNorm = 71.7089, GNorm = 1.3137, lr_0 = 6.6288e-04
Loss = 4.2229e-01, PNorm = 71.8184, GNorm = 1.6016, lr_0 = 6.6663e-04
Loss = 4.4552e-01, PNorm = 71.9323, GNorm = 1.0929, lr_0 = 6.7038e-04
Loss = 3.9752e-01, PNorm = 72.0486, GNorm = 2.6300, lr_0 = 6.7413e-04
Loss = 4.5134e-01, PNorm = 72.1690, GNorm = 1.3835, lr_0 = 6.7788e-04
Loss = 4.3753e-01, PNorm = 72.2964, GNorm = 1.3026, lr_0 = 6.8163e-04
Loss = 3.8131e-01, PNorm = 72.4274, GNorm = 1.1147, lr_0 = 6.8538e-04
Loss = 4.0855e-01, PNorm = 72.5433, GNorm = 1.2641, lr_0 = 6.8913e-04
Loss = 3.7974e-01, PNorm = 72.6636, GNorm = 1.3260, lr_0 = 6.9288e-04
Loss = 4.1055e-01, PNorm = 72.7777, GNorm = 1.2729, lr_0 = 6.9663e-04
Loss = 3.8493e-01, PNorm = 72.8859, GNorm = 1.1589, lr_0 = 7.0038e-04
Loss = 4.2422e-01, PNorm = 72.9958, GNorm = 1.2951, lr_0 = 7.0413e-04
Loss = 3.9465e-01, PNorm = 73.1135, GNorm = 1.3700, lr_0 = 7.0788e-04
Loss = 4.0885e-01, PNorm = 73.2350, GNorm = 1.2024, lr_0 = 7.1163e-04
Loss = 4.9531e-01, PNorm = 73.3491, GNorm = 2.4595, lr_0 = 7.1538e-04
Loss = 4.2676e-01, PNorm = 73.4802, GNorm = 1.0062, lr_0 = 7.1913e-04
Loss = 3.5071e-01, PNorm = 73.6121, GNorm = 1.3444, lr_0 = 7.2288e-04
Loss = 3.9788e-01, PNorm = 73.7321, GNorm = 1.0045, lr_0 = 7.2663e-04
Loss = 4.5592e-01, PNorm = 73.8669, GNorm = 2.1754, lr_0 = 7.3038e-04
Loss = 4.4938e-01, PNorm = 74.0112, GNorm = 1.3433, lr_0 = 7.3413e-04
Loss = 3.7445e-01, PNorm = 74.1442, GNorm = 0.9385, lr_0 = 7.3788e-04
Loss = 3.8829e-01, PNorm = 74.2734, GNorm = 1.2360, lr_0 = 7.4163e-04
Loss = 4.5063e-01, PNorm = 74.3923, GNorm = 1.0337, lr_0 = 7.4538e-04
Loss = 3.9772e-01, PNorm = 74.5128, GNorm = 1.0624, lr_0 = 7.4913e-04
Loss = 3.9366e-01, PNorm = 74.6554, GNorm = 1.5455, lr_0 = 7.5288e-04
Loss = 4.0812e-01, PNorm = 74.7786, GNorm = 1.0373, lr_0 = 7.5663e-04
Loss = 4.3327e-01, PNorm = 74.9203, GNorm = 1.6719, lr_0 = 7.6038e-04
Loss = 4.1237e-01, PNorm = 75.0519, GNorm = 1.2970, lr_0 = 7.6413e-04
Loss = 3.8476e-01, PNorm = 75.1912, GNorm = 1.2903, lr_0 = 7.6788e-04
Loss = 3.9308e-01, PNorm = 75.3086, GNorm = 1.1190, lr_0 = 7.7163e-04
Loss = 4.2352e-01, PNorm = 75.4293, GNorm = 1.1310, lr_0 = 7.7538e-04
Loss = 4.2167e-01, PNorm = 75.5527, GNorm = 1.0895, lr_0 = 7.7913e-04
Loss = 4.8284e-01, PNorm = 75.6792, GNorm = 1.8434, lr_0 = 7.8288e-04
Loss = 4.0087e-01, PNorm = 75.8218, GNorm = 1.3976, lr_0 = 7.8663e-04
Loss = 3.6673e-01, PNorm = 75.9654, GNorm = 0.9798, lr_0 = 7.9038e-04
Loss = 3.4310e-01, PNorm = 76.1009, GNorm = 0.9294, lr_0 = 7.9413e-04
Loss = 4.0668e-01, PNorm = 76.2273, GNorm = 1.0880, lr_0 = 7.9788e-04
Loss = 4.1382e-01, PNorm = 76.3688, GNorm = 1.0433, lr_0 = 8.0163e-04
Loss = 4.0091e-01, PNorm = 76.5145, GNorm = 1.1781, lr_0 = 8.0538e-04
Loss = 4.7263e-01, PNorm = 76.6534, GNorm = 1.2470, lr_0 = 8.0913e-04
Loss = 4.2549e-01, PNorm = 76.8125, GNorm = 1.2326, lr_0 = 8.1288e-04
Loss = 4.5704e-01, PNorm = 76.9641, GNorm = 0.8866, lr_0 = 8.1663e-04
Loss = 4.1069e-01, PNorm = 77.1276, GNorm = 1.7020, lr_0 = 8.2038e-04
Loss = 4.3696e-01, PNorm = 77.2808, GNorm = 1.1541, lr_0 = 8.2413e-04
Loss = 3.9520e-01, PNorm = 77.4264, GNorm = 1.3924, lr_0 = 8.2788e-04
Loss = 4.3410e-01, PNorm = 77.5691, GNorm = 1.3032, lr_0 = 8.3163e-04
Loss = 4.0266e-01, PNorm = 77.7140, GNorm = 1.1355, lr_0 = 8.3538e-04
Loss = 4.5145e-01, PNorm = 77.8519, GNorm = 1.5756, lr_0 = 8.3913e-04
Loss = 4.2817e-01, PNorm = 77.9990, GNorm = 1.2128, lr_0 = 8.4288e-04
Loss = 4.8333e-01, PNorm = 78.1352, GNorm = 1.0973, lr_0 = 8.4663e-04
Loss = 4.1954e-01, PNorm = 78.2854, GNorm = 1.3399, lr_0 = 8.5038e-04
Loss = 4.0194e-01, PNorm = 78.4108, GNorm = 1.7697, lr_0 = 8.5413e-04
Loss = 3.4934e-01, PNorm = 78.5402, GNorm = 0.8318, lr_0 = 8.5788e-04
Loss = 4.4411e-01, PNorm = 78.6706, GNorm = 2.0032, lr_0 = 8.6163e-04
Loss = 3.7067e-01, PNorm = 78.8066, GNorm = 1.0271, lr_0 = 8.6538e-04
Loss = 4.5966e-01, PNorm = 78.9465, GNorm = 1.2197, lr_0 = 8.6913e-04
Loss = 4.3623e-01, PNorm = 79.0829, GNorm = 1.2209, lr_0 = 8.7288e-04
Loss = 4.1237e-01, PNorm = 79.2507, GNorm = 2.0038, lr_0 = 8.7663e-04
Loss = 4.7065e-01, PNorm = 79.4118, GNorm = 0.9760, lr_0 = 8.8038e-04
Loss = 4.3398e-01, PNorm = 79.5838, GNorm = 1.2342, lr_0 = 8.8413e-04
Loss = 4.1725e-01, PNorm = 79.7365, GNorm = 1.0554, lr_0 = 8.8788e-04
Loss = 4.2277e-01, PNorm = 79.8908, GNorm = 1.4588, lr_0 = 8.9163e-04
Loss = 4.6074e-01, PNorm = 80.0587, GNorm = 1.0579, lr_0 = 8.9538e-04
Loss = 4.6539e-01, PNorm = 80.2201, GNorm = 0.6874, lr_0 = 8.9913e-04
Loss = 4.0255e-01, PNorm = 80.3931, GNorm = 1.3578, lr_0 = 9.0288e-04
Loss = 4.7212e-01, PNorm = 80.5480, GNorm = 0.9872, lr_0 = 9.0663e-04
Loss = 3.9775e-01, PNorm = 80.7023, GNorm = 1.1228, lr_0 = 9.1038e-04
Loss = 4.8439e-01, PNorm = 80.8528, GNorm = 1.4283, lr_0 = 9.1413e-04
Loss = 4.0273e-01, PNorm = 81.0117, GNorm = 0.9858, lr_0 = 9.1788e-04
Loss = 4.1360e-01, PNorm = 81.1565, GNorm = 0.9971, lr_0 = 9.2163e-04
Loss = 4.2776e-01, PNorm = 81.3146, GNorm = 1.3922, lr_0 = 9.2538e-04
Loss = 4.4108e-01, PNorm = 81.4870, GNorm = 0.6830, lr_0 = 9.2913e-04
Loss = 4.4315e-01, PNorm = 81.6521, GNorm = 1.5432, lr_0 = 9.3288e-04
Loss = 4.4297e-01, PNorm = 81.8073, GNorm = 1.0103, lr_0 = 9.3663e-04
Loss = 4.3289e-01, PNorm = 81.9723, GNorm = 0.8935, lr_0 = 9.4038e-04
Loss = 4.1116e-01, PNorm = 82.1444, GNorm = 1.0749, lr_0 = 9.4413e-04
Loss = 4.1244e-01, PNorm = 82.3137, GNorm = 0.9606, lr_0 = 9.4788e-04
Loss = 4.6952e-01, PNorm = 82.4786, GNorm = 1.5406, lr_0 = 9.5163e-04
Loss = 4.7835e-01, PNorm = 82.6550, GNorm = 1.3133, lr_0 = 9.5538e-04
Loss = 4.2304e-01, PNorm = 82.8350, GNorm = 1.1624, lr_0 = 9.5913e-04
Loss = 4.6333e-01, PNorm = 83.0053, GNorm = 0.8701, lr_0 = 9.6288e-04
Loss = 5.1270e-01, PNorm = 83.1804, GNorm = 1.2894, lr_0 = 9.6663e-04
Loss = 4.8312e-01, PNorm = 83.3489, GNorm = 0.8978, lr_0 = 9.7038e-04
Loss = 3.3411e-01, PNorm = 83.5039, GNorm = 1.0295, lr_0 = 9.7413e-04
Loss = 4.1032e-01, PNorm = 83.6394, GNorm = 1.0194, lr_0 = 9.7788e-04
Loss = 4.3800e-01, PNorm = 83.7759, GNorm = 1.9363, lr_0 = 9.8163e-04
Loss = 5.0222e-01, PNorm = 83.9141, GNorm = 1.1612, lr_0 = 9.8537e-04
Loss = 5.1522e-01, PNorm = 84.0763, GNorm = 1.1591, lr_0 = 9.8912e-04
Loss = 4.6966e-01, PNorm = 84.2449, GNorm = 0.9246, lr_0 = 9.9288e-04
Loss = 3.9141e-01, PNorm = 84.4024, GNorm = 0.7722, lr_0 = 9.9663e-04
Loss = 4.0487e-01, PNorm = 84.5728, GNorm = 1.3486, lr_0 = 9.9993e-04
Validation mae = 0.132071
Epoch 2
Loss = 2.5240e-01, PNorm = 84.7299, GNorm = 1.3266, lr_0 = 9.9925e-04
Loss = 2.9590e-01, PNorm = 84.9078, GNorm = 0.8395, lr_0 = 9.9856e-04
Loss = 3.0597e-01, PNorm = 85.0491, GNorm = 1.0442, lr_0 = 9.9788e-04
Loss = 2.8445e-01, PNorm = 85.2107, GNorm = 1.1118, lr_0 = 9.9719e-04
Loss = 3.5737e-01, PNorm = 85.3751, GNorm = 1.8603, lr_0 = 9.9651e-04
Loss = 2.6989e-01, PNorm = 85.5322, GNorm = 0.6821, lr_0 = 9.9583e-04
Loss = 2.8128e-01, PNorm = 85.7006, GNorm = 1.5054, lr_0 = 9.9515e-04
Loss = 2.6526e-01, PNorm = 85.8741, GNorm = 0.7538, lr_0 = 9.9446e-04
Loss = 2.8598e-01, PNorm = 86.0498, GNorm = 1.2502, lr_0 = 9.9378e-04
Loss = 2.5998e-01, PNorm = 86.2222, GNorm = 0.8637, lr_0 = 9.9310e-04
Loss = 2.6185e-01, PNorm = 86.3924, GNorm = 0.9704, lr_0 = 9.9242e-04
Loss = 2.7466e-01, PNorm = 86.5618, GNorm = 0.8442, lr_0 = 9.9174e-04
Loss = 2.8108e-01, PNorm = 86.7336, GNorm = 1.0006, lr_0 = 9.9106e-04
Loss = 2.8177e-01, PNorm = 86.9094, GNorm = 1.0109, lr_0 = 9.9038e-04
Loss = 3.1682e-01, PNorm = 87.0837, GNorm = 0.9804, lr_0 = 9.8971e-04
Loss = 2.6631e-01, PNorm = 87.2749, GNorm = 0.9653, lr_0 = 9.8903e-04
Loss = 3.2674e-01, PNorm = 87.4400, GNorm = 1.3146, lr_0 = 9.8835e-04
Loss = 3.0001e-01, PNorm = 87.6236, GNorm = 0.8881, lr_0 = 9.8767e-04
Loss = 2.5320e-01, PNorm = 87.8114, GNorm = 1.3737, lr_0 = 9.8700e-04
Loss = 2.5112e-01, PNorm = 87.9775, GNorm = 1.2769, lr_0 = 9.8632e-04
Loss = 3.3311e-01, PNorm = 88.1606, GNorm = 1.0482, lr_0 = 9.8564e-04
Loss = 2.9515e-01, PNorm = 88.3236, GNorm = 1.1141, lr_0 = 9.8497e-04
Loss = 2.8614e-01, PNorm = 88.4994, GNorm = 1.1935, lr_0 = 9.8429e-04
Loss = 2.7213e-01, PNorm = 88.6726, GNorm = 0.9053, lr_0 = 9.8362e-04
Loss = 3.2683e-01, PNorm = 88.8572, GNorm = 0.9340, lr_0 = 9.8295e-04
Loss = 3.1124e-01, PNorm = 89.0638, GNorm = 1.0177, lr_0 = 9.8227e-04
Loss = 3.0341e-01, PNorm = 89.2500, GNorm = 1.1388, lr_0 = 9.8160e-04
Loss = 3.2405e-01, PNorm = 89.4391, GNorm = 1.0288, lr_0 = 9.8093e-04
Loss = 3.4189e-01, PNorm = 89.6254, GNorm = 1.2562, lr_0 = 9.8026e-04
Loss = 2.7220e-01, PNorm = 89.8090, GNorm = 1.1234, lr_0 = 9.7958e-04
Loss = 3.3482e-01, PNorm = 89.9833, GNorm = 1.1244, lr_0 = 9.7891e-04
Loss = 3.1328e-01, PNorm = 90.1770, GNorm = 0.8987, lr_0 = 9.7824e-04
Loss = 3.0441e-01, PNorm = 90.3537, GNorm = 0.6647, lr_0 = 9.7757e-04
Loss = 3.5390e-01, PNorm = 90.5283, GNorm = 1.1377, lr_0 = 9.7690e-04
Loss = 3.4005e-01, PNorm = 90.7002, GNorm = 1.1609, lr_0 = 9.7623e-04
Loss = 3.0204e-01, PNorm = 90.8696, GNorm = 1.8277, lr_0 = 9.7556e-04
Loss = 3.3511e-01, PNorm = 91.0551, GNorm = 1.9194, lr_0 = 9.7490e-04
Loss = 3.0412e-01, PNorm = 91.2306, GNorm = 1.0742, lr_0 = 9.7423e-04
Loss = 3.2696e-01, PNorm = 91.4071, GNorm = 1.0896, lr_0 = 9.7356e-04
Loss = 3.0732e-01, PNorm = 91.5854, GNorm = 1.2876, lr_0 = 9.7289e-04
Loss = 3.3281e-01, PNorm = 91.7684, GNorm = 1.6870, lr_0 = 9.7223e-04
Loss = 3.3982e-01, PNorm = 91.9409, GNorm = 1.1082, lr_0 = 9.7156e-04
Loss = 2.9183e-01, PNorm = 92.1339, GNorm = 0.8107, lr_0 = 9.7090e-04
Loss = 3.1143e-01, PNorm = 92.3016, GNorm = 0.7676, lr_0 = 9.7023e-04
Loss = 3.1039e-01, PNorm = 92.4663, GNorm = 0.8504, lr_0 = 9.6957e-04
Loss = 2.9395e-01, PNorm = 92.6360, GNorm = 0.9953, lr_0 = 9.6890e-04
Loss = 2.8911e-01, PNorm = 92.7934, GNorm = 0.8069, lr_0 = 9.6824e-04
Loss = 3.0561e-01, PNorm = 92.9587, GNorm = 0.9396, lr_0 = 9.6757e-04
Loss = 3.1952e-01, PNorm = 93.1185, GNorm = 1.3951, lr_0 = 9.6691e-04
Loss = 3.3168e-01, PNorm = 93.2840, GNorm = 0.7962, lr_0 = 9.6625e-04
Loss = 2.7395e-01, PNorm = 93.4425, GNorm = 1.0145, lr_0 = 9.6559e-04
Loss = 2.9792e-01, PNorm = 93.6030, GNorm = 1.2946, lr_0 = 9.6493e-04
Loss = 3.1464e-01, PNorm = 93.7814, GNorm = 1.6467, lr_0 = 9.6427e-04
Loss = 3.4212e-01, PNorm = 93.9562, GNorm = 0.8840, lr_0 = 9.6360e-04
Loss = 3.4428e-01, PNorm = 94.1402, GNorm = 1.4059, lr_0 = 9.6294e-04
Loss = 3.2604e-01, PNorm = 94.3143, GNorm = 1.3913, lr_0 = 9.6228e-04
Loss = 3.2704e-01, PNorm = 94.4977, GNorm = 1.4629, lr_0 = 9.6163e-04
Loss = 3.0370e-01, PNorm = 94.6615, GNorm = 0.9115, lr_0 = 9.6097e-04
Loss = 3.5641e-01, PNorm = 94.8298, GNorm = 0.8150, lr_0 = 9.6031e-04
Loss = 2.9826e-01, PNorm = 95.0044, GNorm = 0.9569, lr_0 = 9.5965e-04
Loss = 3.3132e-01, PNorm = 95.1749, GNorm = 1.0742, lr_0 = 9.5899e-04
Loss = 3.2997e-01, PNorm = 95.3192, GNorm = 1.7133, lr_0 = 9.5834e-04
Loss = 3.3815e-01, PNorm = 95.4840, GNorm = 1.2585, lr_0 = 9.5768e-04
Loss = 3.2009e-01, PNorm = 95.6386, GNorm = 1.4335, lr_0 = 9.5702e-04
Loss = 3.5235e-01, PNorm = 95.8022, GNorm = 1.3515, lr_0 = 9.5637e-04
Loss = 3.3689e-01, PNorm = 95.9740, GNorm = 1.6470, lr_0 = 9.5571e-04
Loss = 3.2658e-01, PNorm = 96.1436, GNorm = 1.2797, lr_0 = 9.5506e-04
Loss = 3.0502e-01, PNorm = 96.3071, GNorm = 1.1536, lr_0 = 9.5440e-04
Loss = 3.5452e-01, PNorm = 96.4708, GNorm = 1.2709, lr_0 = 9.5375e-04
Loss = 3.4545e-01, PNorm = 96.6430, GNorm = 1.0888, lr_0 = 9.5310e-04
Loss = 3.2205e-01, PNorm = 96.8189, GNorm = 1.3327, lr_0 = 9.5244e-04
Loss = 3.9269e-01, PNorm = 97.0121, GNorm = 1.2613, lr_0 = 9.5179e-04
Loss = 3.2125e-01, PNorm = 97.1914, GNorm = 1.8465, lr_0 = 9.5114e-04
Loss = 3.7091e-01, PNorm = 97.3704, GNorm = 1.6000, lr_0 = 9.5049e-04
Loss = 3.2079e-01, PNorm = 97.5415, GNorm = 1.3176, lr_0 = 9.4984e-04
Loss = 3.3296e-01, PNorm = 97.7172, GNorm = 1.5243, lr_0 = 9.4919e-04
Loss = 2.9289e-01, PNorm = 97.8700, GNorm = 1.0220, lr_0 = 9.4854e-04
Loss = 2.8850e-01, PNorm = 98.0247, GNorm = 0.9059, lr_0 = 9.4789e-04
Loss = 3.2229e-01, PNorm = 98.1808, GNorm = 0.8944, lr_0 = 9.4724e-04
Loss = 3.3863e-01, PNorm = 98.3404, GNorm = 1.2092, lr_0 = 9.4659e-04
Loss = 3.3152e-01, PNorm = 98.5200, GNorm = 1.5032, lr_0 = 9.4594e-04
Loss = 3.1699e-01, PNorm = 98.6774, GNorm = 1.1197, lr_0 = 9.4529e-04
Loss = 3.4931e-01, PNorm = 98.8443, GNorm = 1.0731, lr_0 = 9.4464e-04
Loss = 3.5746e-01, PNorm = 99.0115, GNorm = 1.6880, lr_0 = 9.4400e-04
Loss = 3.5557e-01, PNorm = 99.1770, GNorm = 1.0407, lr_0 = 9.4335e-04
Loss = 3.2358e-01, PNorm = 99.3395, GNorm = 0.8817, lr_0 = 9.4270e-04
Loss = 2.9775e-01, PNorm = 99.4994, GNorm = 1.0261, lr_0 = 9.4206e-04
Loss = 3.4211e-01, PNorm = 99.6392, GNorm = 1.2279, lr_0 = 9.4141e-04
Loss = 3.5118e-01, PNorm = 99.7922, GNorm = 1.0331, lr_0 = 9.4077e-04
Loss = 3.2080e-01, PNorm = 99.9502, GNorm = 0.8261, lr_0 = 9.4012e-04
Loss = 2.9852e-01, PNorm = 100.1019, GNorm = 1.2459, lr_0 = 9.3948e-04
Loss = 3.6727e-01, PNorm = 100.2479, GNorm = 1.2065, lr_0 = 9.3884e-04
Loss = 3.6402e-01, PNorm = 100.4042, GNorm = 1.3409, lr_0 = 9.3819e-04
Loss = 3.4974e-01, PNorm = 100.5566, GNorm = 1.1631, lr_0 = 9.3755e-04
Loss = 3.2503e-01, PNorm = 100.7153, GNorm = 0.9580, lr_0 = 9.3691e-04
Loss = 2.8975e-01, PNorm = 100.8594, GNorm = 0.9879, lr_0 = 9.3627e-04
Loss = 3.2776e-01, PNorm = 100.9979, GNorm = 0.9826, lr_0 = 9.3562e-04
Loss = 3.3879e-01, PNorm = 101.1590, GNorm = 1.0074, lr_0 = 9.3498e-04
Loss = 3.0036e-01, PNorm = 101.3095, GNorm = 1.2910, lr_0 = 9.3434e-04
Loss = 3.0851e-01, PNorm = 101.4487, GNorm = 0.8604, lr_0 = 9.3370e-04
Loss = 2.7398e-01, PNorm = 101.5845, GNorm = 0.9624, lr_0 = 9.3306e-04
Loss = 2.8897e-01, PNorm = 101.7291, GNorm = 1.0412, lr_0 = 9.3242e-04
Loss = 3.1241e-01, PNorm = 101.8644, GNorm = 0.8516, lr_0 = 9.3178e-04
Loss = 2.8519e-01, PNorm = 101.9943, GNorm = 1.1305, lr_0 = 9.3115e-04
Loss = 3.2593e-01, PNorm = 102.1235, GNorm = 0.9631, lr_0 = 9.3051e-04
Loss = 3.1783e-01, PNorm = 102.2533, GNorm = 1.1933, lr_0 = 9.2987e-04
Loss = 3.2756e-01, PNorm = 102.3947, GNorm = 0.9359, lr_0 = 9.2923e-04
Loss = 3.6153e-01, PNorm = 102.5428, GNorm = 1.1088, lr_0 = 9.2860e-04
Loss = 3.2592e-01, PNorm = 102.6926, GNorm = 0.9236, lr_0 = 9.2796e-04
Loss = 3.2712e-01, PNorm = 102.8278, GNorm = 1.0560, lr_0 = 9.2733e-04
Loss = 3.5619e-01, PNorm = 102.9816, GNorm = 0.9380, lr_0 = 9.2669e-04
Loss = 3.4590e-01, PNorm = 103.1196, GNorm = 0.9582, lr_0 = 9.2606e-04
Loss = 3.1597e-01, PNorm = 103.2670, GNorm = 1.3489, lr_0 = 9.2542e-04
Loss = 3.1412e-01, PNorm = 103.4118, GNorm = 0.9039, lr_0 = 9.2479e-04
Loss = 3.1871e-01, PNorm = 103.5587, GNorm = 0.8895, lr_0 = 9.2415e-04
Loss = 3.0621e-01, PNorm = 103.7061, GNorm = 0.9149, lr_0 = 9.2352e-04
Loss = 3.1672e-01, PNorm = 103.8479, GNorm = 1.2170, lr_0 = 9.2289e-04
Loss = 3.0144e-01, PNorm = 103.9934, GNorm = 1.4212, lr_0 = 9.2226e-04
Loss = 2.9814e-01, PNorm = 104.1394, GNorm = 0.8857, lr_0 = 9.2162e-04
Loss = 3.4236e-01, PNorm = 104.2821, GNorm = 1.1039, lr_0 = 9.2099e-04
Validation mae = 0.124896
Epoch 3
Loss = 2.0268e-01, PNorm = 104.4144, GNorm = 0.9141, lr_0 = 9.2036e-04
Loss = 1.8150e-01, PNorm = 104.5396, GNorm = 1.1127, lr_0 = 9.1973e-04
Loss = 1.9212e-01, PNorm = 104.6411, GNorm = 1.4161, lr_0 = 9.1910e-04
Loss = 1.6770e-01, PNorm = 104.7367, GNorm = 0.9059, lr_0 = 9.1847e-04
Loss = 1.9948e-01, PNorm = 104.8398, GNorm = 0.6929, lr_0 = 9.1784e-04
Loss = 1.6045e-01, PNorm = 104.9363, GNorm = 0.7511, lr_0 = 9.1721e-04
Loss = 1.7986e-01, PNorm = 105.0271, GNorm = 0.7911, lr_0 = 9.1658e-04
Loss = 1.5450e-01, PNorm = 105.1192, GNorm = 0.8205, lr_0 = 9.1596e-04
Loss = 1.4455e-01, PNorm = 105.1953, GNorm = 0.6720, lr_0 = 9.1533e-04
Loss = 1.5917e-01, PNorm = 105.2799, GNorm = 0.4860, lr_0 = 9.1470e-04
Loss = 1.7975e-01, PNorm = 105.3615, GNorm = 0.7940, lr_0 = 9.1408e-04
Loss = 2.0644e-01, PNorm = 105.4650, GNorm = 1.7265, lr_0 = 9.1345e-04
Loss = 1.9233e-01, PNorm = 105.5646, GNorm = 0.8920, lr_0 = 9.1282e-04
Loss = 2.0252e-01, PNorm = 105.6803, GNorm = 0.7694, lr_0 = 9.1220e-04
Loss = 1.8692e-01, PNorm = 105.7881, GNorm = 0.8011, lr_0 = 9.1157e-04
Loss = 2.0348e-01, PNorm = 105.8993, GNorm = 0.9026, lr_0 = 9.1095e-04
Loss = 1.7487e-01, PNorm = 106.0155, GNorm = 0.7694, lr_0 = 9.1032e-04
Loss = 1.8115e-01, PNorm = 106.1203, GNorm = 0.7630, lr_0 = 9.0970e-04
Loss = 1.9241e-01, PNorm = 106.2345, GNorm = 0.8121, lr_0 = 9.0908e-04
Loss = 1.6035e-01, PNorm = 106.3360, GNorm = 0.8024, lr_0 = 9.0846e-04
Loss = 1.7182e-01, PNorm = 106.4430, GNorm = 0.9036, lr_0 = 9.0783e-04
Loss = 1.5854e-01, PNorm = 106.5407, GNorm = 1.1052, lr_0 = 9.0721e-04
Loss = 1.8287e-01, PNorm = 106.6550, GNorm = 0.6434, lr_0 = 9.0659e-04
Loss = 1.7184e-01, PNorm = 106.7622, GNorm = 1.5530, lr_0 = 9.0597e-04
Loss = 2.1086e-01, PNorm = 106.8690, GNorm = 1.0651, lr_0 = 9.0535e-04
Loss = 2.0185e-01, PNorm = 106.9881, GNorm = 1.3517, lr_0 = 9.0473e-04
Loss = 1.7366e-01, PNorm = 107.1048, GNorm = 0.8986, lr_0 = 9.0411e-04
Loss = 2.0162e-01, PNorm = 107.2094, GNorm = 0.9705, lr_0 = 9.0349e-04
Loss = 1.9680e-01, PNorm = 107.3236, GNorm = 1.3854, lr_0 = 9.0287e-04
Loss = 1.8742e-01, PNorm = 107.4393, GNorm = 1.1868, lr_0 = 9.0225e-04
Loss = 2.0191e-01, PNorm = 107.5557, GNorm = 0.9963, lr_0 = 9.0163e-04
Loss = 1.7335e-01, PNorm = 107.6795, GNorm = 0.7577, lr_0 = 9.0102e-04
Loss = 1.9406e-01, PNorm = 107.7993, GNorm = 1.1513, lr_0 = 9.0040e-04
Loss = 1.9601e-01, PNorm = 107.9208, GNorm = 0.9212, lr_0 = 8.9978e-04
Loss = 1.8331e-01, PNorm = 108.0589, GNorm = 0.5881, lr_0 = 8.9916e-04
Loss = 1.9315e-01, PNorm = 108.1629, GNorm = 0.5501, lr_0 = 8.9855e-04
Loss = 1.9893e-01, PNorm = 108.2875, GNorm = 1.3170, lr_0 = 8.9793e-04
Loss = 1.9477e-01, PNorm = 108.3957, GNorm = 0.6331, lr_0 = 8.9732e-04
Loss = 1.8795e-01, PNorm = 108.5089, GNorm = 1.1009, lr_0 = 8.9670e-04
Loss = 1.8629e-01, PNorm = 108.6341, GNorm = 1.1481, lr_0 = 8.9609e-04
Loss = 2.1822e-01, PNorm = 108.7490, GNorm = 0.7676, lr_0 = 8.9548e-04
Loss = 1.6711e-01, PNorm = 108.8620, GNorm = 0.8910, lr_0 = 8.9486e-04
Loss = 2.2308e-01, PNorm = 108.9883, GNorm = 0.9344, lr_0 = 8.9425e-04
Loss = 1.9102e-01, PNorm = 109.1083, GNorm = 1.0567, lr_0 = 8.9364e-04
Loss = 1.7488e-01, PNorm = 109.2332, GNorm = 0.7676, lr_0 = 8.9302e-04
Loss = 2.0782e-01, PNorm = 109.3560, GNorm = 0.8276, lr_0 = 8.9241e-04
Loss = 1.7747e-01, PNorm = 109.4804, GNorm = 0.7242, lr_0 = 8.9180e-04
Loss = 1.9442e-01, PNorm = 109.6129, GNorm = 0.7724, lr_0 = 8.9119e-04
Loss = 1.9785e-01, PNorm = 109.7382, GNorm = 0.5411, lr_0 = 8.9058e-04
Loss = 1.9282e-01, PNorm = 109.8492, GNorm = 0.8926, lr_0 = 8.8997e-04
Loss = 1.7411e-01, PNorm = 109.9749, GNorm = 0.7618, lr_0 = 8.8936e-04
Loss = 2.0689e-01, PNorm = 110.0951, GNorm = 0.7745, lr_0 = 8.8875e-04
Loss = 1.8565e-01, PNorm = 110.2228, GNorm = 0.7401, lr_0 = 8.8814e-04
Loss = 1.9617e-01, PNorm = 110.3434, GNorm = 1.6169, lr_0 = 8.8753e-04
Loss = 2.2210e-01, PNorm = 110.4609, GNorm = 0.7984, lr_0 = 8.8693e-04
Loss = 2.1436e-01, PNorm = 110.5942, GNorm = 1.0379, lr_0 = 8.8632e-04
Loss = 2.1979e-01, PNorm = 110.7236, GNorm = 1.6697, lr_0 = 8.8571e-04
Loss = 2.0818e-01, PNorm = 110.8453, GNorm = 0.7674, lr_0 = 8.8510e-04
Loss = 1.4934e-01, PNorm = 110.9713, GNorm = 0.8200, lr_0 = 8.8450e-04
Loss = 2.6972e-01, PNorm = 111.0861, GNorm = 0.9434, lr_0 = 8.8389e-04
Loss = 2.1026e-01, PNorm = 111.2188, GNorm = 1.5593, lr_0 = 8.8329e-04
Loss = 2.0804e-01, PNorm = 111.3427, GNorm = 1.3324, lr_0 = 8.8268e-04
Loss = 1.8619e-01, PNorm = 111.4612, GNorm = 1.4464, lr_0 = 8.8208e-04
Loss = 2.3176e-01, PNorm = 111.5803, GNorm = 0.7319, lr_0 = 8.8147e-04
Loss = 2.0313e-01, PNorm = 111.7178, GNorm = 0.9598, lr_0 = 8.8087e-04
Loss = 2.0925e-01, PNorm = 111.8407, GNorm = 0.9196, lr_0 = 8.8026e-04
Loss = 1.7860e-01, PNorm = 111.9675, GNorm = 0.6509, lr_0 = 8.7966e-04
Loss = 2.1062e-01, PNorm = 112.0725, GNorm = 0.9969, lr_0 = 8.7906e-04
Loss = 1.8639e-01, PNorm = 112.1956, GNorm = 0.9434, lr_0 = 8.7846e-04
Loss = 2.0419e-01, PNorm = 112.3218, GNorm = 1.0006, lr_0 = 8.7785e-04
Loss = 1.9225e-01, PNorm = 112.4482, GNorm = 0.9602, lr_0 = 8.7725e-04
Loss = 2.0156e-01, PNorm = 112.5752, GNorm = 1.6709, lr_0 = 8.7665e-04
Loss = 2.0040e-01, PNorm = 112.6993, GNorm = 1.5023, lr_0 = 8.7605e-04
Loss = 1.8614e-01, PNorm = 112.8364, GNorm = 0.9983, lr_0 = 8.7545e-04
Loss = 2.5687e-01, PNorm = 112.9594, GNorm = 0.9665, lr_0 = 8.7485e-04
Loss = 1.9221e-01, PNorm = 113.0950, GNorm = 1.0242, lr_0 = 8.7425e-04
Loss = 2.0607e-01, PNorm = 113.2110, GNorm = 1.6421, lr_0 = 8.7365e-04
Loss = 2.1192e-01, PNorm = 113.3408, GNorm = 0.9220, lr_0 = 8.7306e-04
Loss = 1.9944e-01, PNorm = 113.4597, GNorm = 1.4072, lr_0 = 8.7246e-04
Loss = 2.2983e-01, PNorm = 113.5866, GNorm = 0.8276, lr_0 = 8.7186e-04
Loss = 2.0696e-01, PNorm = 113.7105, GNorm = 0.7732, lr_0 = 8.7126e-04
Loss = 2.1515e-01, PNorm = 113.8314, GNorm = 1.6278, lr_0 = 8.7067e-04
Loss = 2.3088e-01, PNorm = 113.9610, GNorm = 1.0593, lr_0 = 8.7007e-04
Loss = 2.1329e-01, PNorm = 114.0771, GNorm = 1.1808, lr_0 = 8.6947e-04
Loss = 1.9112e-01, PNorm = 114.1928, GNorm = 0.9369, lr_0 = 8.6888e-04
Loss = 1.5367e-01, PNorm = 114.3044, GNorm = 0.8882, lr_0 = 8.6828e-04
Loss = 2.3038e-01, PNorm = 114.4230, GNorm = 0.7531, lr_0 = 8.6769e-04
Loss = 2.3635e-01, PNorm = 114.5486, GNorm = 0.8408, lr_0 = 8.6709e-04
Loss = 2.0566e-01, PNorm = 114.6849, GNorm = 1.1467, lr_0 = 8.6650e-04
Loss = 1.9638e-01, PNorm = 114.8210, GNorm = 0.8322, lr_0 = 8.6590e-04
Loss = 1.9899e-01, PNorm = 114.9539, GNorm = 0.7313, lr_0 = 8.6531e-04
Loss = 2.3186e-01, PNorm = 115.0800, GNorm = 1.2231, lr_0 = 8.6472e-04
Loss = 2.3380e-01, PNorm = 115.2091, GNorm = 1.3818, lr_0 = 8.6413e-04
Loss = 2.3059e-01, PNorm = 115.3387, GNorm = 0.6318, lr_0 = 8.6353e-04
Loss = 1.9395e-01, PNorm = 115.4759, GNorm = 0.9581, lr_0 = 8.6294e-04
Loss = 2.1070e-01, PNorm = 115.6063, GNorm = 1.1514, lr_0 = 8.6235e-04
Loss = 2.1161e-01, PNorm = 115.7268, GNorm = 1.2903, lr_0 = 8.6176e-04
Loss = 1.7645e-01, PNorm = 115.8576, GNorm = 0.6272, lr_0 = 8.6117e-04
Loss = 2.1994e-01, PNorm = 115.9769, GNorm = 1.3430, lr_0 = 8.6058e-04
Loss = 2.2308e-01, PNorm = 116.1116, GNorm = 0.9773, lr_0 = 8.5999e-04
Loss = 2.1101e-01, PNorm = 116.2388, GNorm = 0.7779, lr_0 = 8.5940e-04
Loss = 2.2992e-01, PNorm = 116.3653, GNorm = 0.7695, lr_0 = 8.5881e-04
Loss = 2.2928e-01, PNorm = 116.4904, GNorm = 1.0258, lr_0 = 8.5823e-04
Loss = 1.9335e-01, PNorm = 116.6112, GNorm = 0.7508, lr_0 = 8.5764e-04
Loss = 2.0257e-01, PNorm = 116.7359, GNorm = 0.7846, lr_0 = 8.5705e-04
Loss = 2.0144e-01, PNorm = 116.8638, GNorm = 0.5961, lr_0 = 8.5646e-04
Loss = 2.0716e-01, PNorm = 116.9930, GNorm = 0.7285, lr_0 = 8.5588e-04
Loss = 2.3694e-01, PNorm = 117.1197, GNorm = 1.2235, lr_0 = 8.5529e-04
Loss = 2.3219e-01, PNorm = 117.2468, GNorm = 0.9535, lr_0 = 8.5470e-04
Loss = 1.9513e-01, PNorm = 117.3665, GNorm = 0.6129, lr_0 = 8.5412e-04
Loss = 2.2140e-01, PNorm = 117.4818, GNorm = 1.4531, lr_0 = 8.5353e-04
Loss = 2.1930e-01, PNorm = 117.5946, GNorm = 1.0145, lr_0 = 8.5295e-04
Loss = 2.3284e-01, PNorm = 117.7151, GNorm = 0.8774, lr_0 = 8.5236e-04
Loss = 2.1899e-01, PNorm = 117.8431, GNorm = 1.0026, lr_0 = 8.5178e-04
Loss = 2.2811e-01, PNorm = 117.9683, GNorm = 0.8479, lr_0 = 8.5120e-04
Loss = 2.8809e-01, PNorm = 118.0967, GNorm = 2.5532, lr_0 = 8.5061e-04
Loss = 2.2106e-01, PNorm = 118.2231, GNorm = 1.0125, lr_0 = 8.5003e-04
Loss = 2.8858e-01, PNorm = 118.3649, GNorm = 0.8269, lr_0 = 8.4945e-04
Loss = 2.4132e-01, PNorm = 118.5032, GNorm = 0.9061, lr_0 = 8.4887e-04
Loss = 2.0619e-01, PNorm = 118.6305, GNorm = 0.6172, lr_0 = 8.4828e-04
Validation mae = 0.125483
Epoch 4
Loss = 1.2662e-01, PNorm = 118.7449, GNorm = 0.6312, lr_0 = 8.4770e-04
Loss = 1.2980e-01, PNorm = 118.8400, GNorm = 0.5782, lr_0 = 8.4712e-04
Loss = 1.2558e-01, PNorm = 118.9309, GNorm = 0.7672, lr_0 = 8.4654e-04
Loss = 1.2227e-01, PNorm = 119.0135, GNorm = 0.9117, lr_0 = 8.4596e-04
Loss = 1.3264e-01, PNorm = 119.0888, GNorm = 0.6587, lr_0 = 8.4538e-04
Loss = 1.2093e-01, PNorm = 119.1745, GNorm = 0.6921, lr_0 = 8.4480e-04
Loss = 1.2462e-01, PNorm = 119.2559, GNorm = 0.7974, lr_0 = 8.4423e-04
Loss = 1.1871e-01, PNorm = 119.3384, GNorm = 0.5916, lr_0 = 8.4365e-04
Loss = 1.2250e-01, PNorm = 119.4104, GNorm = 0.8104, lr_0 = 8.4307e-04
Loss = 1.1409e-01, PNorm = 119.4767, GNorm = 0.7157, lr_0 = 8.4249e-04
Loss = 1.1657e-01, PNorm = 119.5516, GNorm = 0.5451, lr_0 = 8.4191e-04
Loss = 9.9227e-02, PNorm = 119.6172, GNorm = 0.5202, lr_0 = 8.4134e-04
Loss = 1.2679e-01, PNorm = 119.6894, GNorm = 0.6470, lr_0 = 8.4076e-04
Loss = 1.1721e-01, PNorm = 119.7589, GNorm = 0.6224, lr_0 = 8.4019e-04
Loss = 1.0526e-01, PNorm = 119.8407, GNorm = 0.6108, lr_0 = 8.3961e-04
Loss = 1.3027e-01, PNorm = 119.9069, GNorm = 0.6451, lr_0 = 8.3903e-04
Loss = 1.1180e-01, PNorm = 119.9847, GNorm = 0.5649, lr_0 = 8.3846e-04
Loss = 9.9637e-02, PNorm = 120.0589, GNorm = 0.6694, lr_0 = 8.3789e-04
Loss = 1.1455e-01, PNorm = 120.1270, GNorm = 0.8503, lr_0 = 8.3731e-04
Loss = 1.2017e-01, PNorm = 120.1955, GNorm = 0.6488, lr_0 = 8.3674e-04
Loss = 1.1988e-01, PNorm = 120.2737, GNorm = 1.2623, lr_0 = 8.3616e-04
Loss = 1.2137e-01, PNorm = 120.3545, GNorm = 0.8150, lr_0 = 8.3559e-04
Loss = 1.3545e-01, PNorm = 120.4379, GNorm = 1.0655, lr_0 = 8.3502e-04
Loss = 1.0718e-01, PNorm = 120.5297, GNorm = 0.4426, lr_0 = 8.3445e-04
Loss = 1.0503e-01, PNorm = 120.6147, GNorm = 0.6256, lr_0 = 8.3388e-04
Loss = 1.5428e-01, PNorm = 120.7096, GNorm = 0.9116, lr_0 = 8.3330e-04
Loss = 1.2691e-01, PNorm = 120.7971, GNorm = 0.7139, lr_0 = 8.3273e-04
Loss = 1.1643e-01, PNorm = 120.8851, GNorm = 0.6057, lr_0 = 8.3216e-04
Loss = 1.0244e-01, PNorm = 120.9692, GNorm = 0.6163, lr_0 = 8.3159e-04
Loss = 1.1848e-01, PNorm = 121.0517, GNorm = 0.8098, lr_0 = 8.3102e-04
Loss = 1.1849e-01, PNorm = 121.1374, GNorm = 0.5806, lr_0 = 8.3045e-04
Loss = 1.3474e-01, PNorm = 121.2223, GNorm = 0.9032, lr_0 = 8.2988e-04
Loss = 1.1795e-01, PNorm = 121.3119, GNorm = 0.5585, lr_0 = 8.2932e-04
Loss = 1.3606e-01, PNorm = 121.4017, GNorm = 0.9553, lr_0 = 8.2875e-04
Loss = 1.3300e-01, PNorm = 121.4912, GNorm = 0.6662, lr_0 = 8.2818e-04
Loss = 1.0660e-01, PNorm = 121.5828, GNorm = 0.5215, lr_0 = 8.2761e-04
Loss = 1.1187e-01, PNorm = 121.6690, GNorm = 0.9428, lr_0 = 8.2705e-04
Loss = 1.4197e-01, PNorm = 121.7489, GNorm = 1.0997, lr_0 = 8.2648e-04
Loss = 1.1039e-01, PNorm = 121.8371, GNorm = 0.6294, lr_0 = 8.2591e-04
Loss = 1.2752e-01, PNorm = 121.9267, GNorm = 0.7562, lr_0 = 8.2535e-04
Loss = 1.1349e-01, PNorm = 122.0284, GNorm = 0.5822, lr_0 = 8.2478e-04
Loss = 1.1377e-01, PNorm = 122.1171, GNorm = 1.1037, lr_0 = 8.2422e-04
Loss = 1.1172e-01, PNorm = 122.2066, GNorm = 0.6250, lr_0 = 8.2365e-04
Loss = 1.2358e-01, PNorm = 122.2979, GNorm = 0.9962, lr_0 = 8.2309e-04
Loss = 1.3545e-01, PNorm = 122.3891, GNorm = 0.8168, lr_0 = 8.2252e-04
Loss = 1.1978e-01, PNorm = 122.4777, GNorm = 0.6640, lr_0 = 8.2196e-04
Loss = 1.1984e-01, PNorm = 122.5710, GNorm = 0.5424, lr_0 = 8.2140e-04
Loss = 1.3398e-01, PNorm = 122.6611, GNorm = 0.7824, lr_0 = 8.2084e-04
Loss = 1.3768e-01, PNorm = 122.7545, GNorm = 0.7521, lr_0 = 8.2027e-04
Loss = 1.4658e-01, PNorm = 122.8667, GNorm = 1.0225, lr_0 = 8.1971e-04
Loss = 1.1491e-01, PNorm = 122.9477, GNorm = 0.5195, lr_0 = 8.1915e-04
Loss = 1.4567e-01, PNorm = 123.0544, GNorm = 1.1084, lr_0 = 8.1859e-04
Loss = 1.1905e-01, PNorm = 123.1514, GNorm = 0.7755, lr_0 = 8.1803e-04
Loss = 1.3009e-01, PNorm = 123.2480, GNorm = 1.2269, lr_0 = 8.1747e-04
Loss = 1.6438e-01, PNorm = 123.3521, GNorm = 1.3719, lr_0 = 8.1691e-04
Loss = 1.4059e-01, PNorm = 123.4445, GNorm = 0.8663, lr_0 = 8.1635e-04
Loss = 1.0562e-01, PNorm = 123.5483, GNorm = 0.4434, lr_0 = 8.1579e-04
Loss = 1.2587e-01, PNorm = 123.6399, GNorm = 1.3077, lr_0 = 8.1523e-04
Loss = 1.1439e-01, PNorm = 123.7353, GNorm = 0.9043, lr_0 = 8.1467e-04
Loss = 1.2456e-01, PNorm = 123.8224, GNorm = 0.6088, lr_0 = 8.1411e-04
Loss = 1.3667e-01, PNorm = 123.9139, GNorm = 0.8063, lr_0 = 8.1355e-04
Loss = 1.4586e-01, PNorm = 124.0048, GNorm = 0.8236, lr_0 = 8.1300e-04
Loss = 1.4957e-01, PNorm = 124.0959, GNorm = 0.8885, lr_0 = 8.1244e-04
Loss = 1.2103e-01, PNorm = 124.2003, GNorm = 0.8027, lr_0 = 8.1188e-04
Loss = 1.2564e-01, PNorm = 124.2941, GNorm = 0.5075, lr_0 = 8.1133e-04
Loss = 1.3420e-01, PNorm = 124.3989, GNorm = 0.6234, lr_0 = 8.1077e-04
Loss = 1.3921e-01, PNorm = 124.4963, GNorm = 0.7577, lr_0 = 8.1022e-04
Loss = 1.2137e-01, PNorm = 124.5991, GNorm = 0.4447, lr_0 = 8.0966e-04
Loss = 1.2394e-01, PNorm = 124.7072, GNorm = 0.5690, lr_0 = 8.0911e-04
Loss = 1.2726e-01, PNorm = 124.8074, GNorm = 0.8510, lr_0 = 8.0855e-04
Loss = 1.2494e-01, PNorm = 124.9021, GNorm = 0.5781, lr_0 = 8.0800e-04
Loss = 1.3621e-01, PNorm = 124.9986, GNorm = 1.2694, lr_0 = 8.0745e-04
Loss = 1.5486e-01, PNorm = 125.0951, GNorm = 0.9526, lr_0 = 8.0689e-04
Loss = 1.5460e-01, PNorm = 125.1886, GNorm = 0.8572, lr_0 = 8.0634e-04
Loss = 1.3154e-01, PNorm = 125.2842, GNorm = 0.5749, lr_0 = 8.0579e-04
Loss = 1.3660e-01, PNorm = 125.3785, GNorm = 0.7597, lr_0 = 8.0523e-04
Loss = 1.2245e-01, PNorm = 125.4882, GNorm = 0.7777, lr_0 = 8.0468e-04
Loss = 1.4349e-01, PNorm = 125.5860, GNorm = 1.2813, lr_0 = 8.0413e-04
Loss = 1.2614e-01, PNorm = 125.6837, GNorm = 0.8922, lr_0 = 8.0358e-04
Loss = 1.3088e-01, PNorm = 125.7829, GNorm = 0.8368, lr_0 = 8.0303e-04
Loss = 1.2305e-01, PNorm = 125.8760, GNorm = 0.7543, lr_0 = 8.0248e-04
Loss = 1.5000e-01, PNorm = 125.9750, GNorm = 0.8183, lr_0 = 8.0193e-04
Loss = 1.4245e-01, PNorm = 126.0703, GNorm = 0.6039, lr_0 = 8.0138e-04
Loss = 1.3426e-01, PNorm = 126.1715, GNorm = 0.6170, lr_0 = 8.0083e-04
Loss = 1.3714e-01, PNorm = 126.2704, GNorm = 0.8185, lr_0 = 8.0028e-04
Loss = 1.2858e-01, PNorm = 126.3633, GNorm = 0.5830, lr_0 = 7.9974e-04
Loss = 1.3380e-01, PNorm = 126.4543, GNorm = 1.0010, lr_0 = 7.9919e-04
Loss = 1.1970e-01, PNorm = 126.5458, GNorm = 0.7928, lr_0 = 7.9864e-04
Loss = 1.6198e-01, PNorm = 126.6371, GNorm = 0.6588, lr_0 = 7.9809e-04
Loss = 1.3493e-01, PNorm = 126.7427, GNorm = 0.6786, lr_0 = 7.9755e-04
Loss = 1.2847e-01, PNorm = 126.8473, GNorm = 0.8015, lr_0 = 7.9700e-04
Loss = 1.2298e-01, PNorm = 126.9434, GNorm = 0.9637, lr_0 = 7.9645e-04
Loss = 1.2030e-01, PNorm = 127.0284, GNorm = 0.4971, lr_0 = 7.9591e-04
Loss = 1.4377e-01, PNorm = 127.1263, GNorm = 1.5475, lr_0 = 7.9536e-04
Loss = 1.3586e-01, PNorm = 127.2254, GNorm = 0.6086, lr_0 = 7.9482e-04
Loss = 1.4205e-01, PNorm = 127.3234, GNorm = 0.8387, lr_0 = 7.9427e-04
Loss = 1.4994e-01, PNorm = 127.4281, GNorm = 0.6287, lr_0 = 7.9373e-04
Loss = 1.4544e-01, PNorm = 127.5160, GNorm = 0.7381, lr_0 = 7.9319e-04
Loss = 1.1689e-01, PNorm = 127.6133, GNorm = 0.8469, lr_0 = 7.9264e-04
Loss = 1.4767e-01, PNorm = 127.7099, GNorm = 0.7267, lr_0 = 7.9210e-04
Loss = 1.4446e-01, PNorm = 127.8079, GNorm = 1.0990, lr_0 = 7.9156e-04
Loss = 1.2924e-01, PNorm = 127.9080, GNorm = 1.1434, lr_0 = 7.9101e-04
Loss = 1.4715e-01, PNorm = 128.0160, GNorm = 0.6150, lr_0 = 7.9047e-04
Loss = 1.4070e-01, PNorm = 128.1287, GNorm = 0.7856, lr_0 = 7.8993e-04
Loss = 1.4368e-01, PNorm = 128.2319, GNorm = 0.6930, lr_0 = 7.8939e-04
Loss = 1.4642e-01, PNorm = 128.3443, GNorm = 0.5879, lr_0 = 7.8885e-04
Loss = 1.2854e-01, PNorm = 128.4463, GNorm = 0.6463, lr_0 = 7.8831e-04
Loss = 1.6220e-01, PNorm = 128.5486, GNorm = 0.5842, lr_0 = 7.8777e-04
Loss = 1.1891e-01, PNorm = 128.6612, GNorm = 1.3734, lr_0 = 7.8723e-04
Loss = 1.4728e-01, PNorm = 128.7553, GNorm = 1.1316, lr_0 = 7.8669e-04
Loss = 1.4689e-01, PNorm = 128.8674, GNorm = 0.9665, lr_0 = 7.8615e-04
Loss = 1.6643e-01, PNorm = 128.9784, GNorm = 0.9102, lr_0 = 7.8561e-04
Loss = 1.3509e-01, PNorm = 129.0920, GNorm = 0.7441, lr_0 = 7.8507e-04
Loss = 1.3893e-01, PNorm = 129.2001, GNorm = 1.4243, lr_0 = 7.8454e-04
Loss = 1.5988e-01, PNorm = 129.2996, GNorm = 1.0605, lr_0 = 7.8400e-04
Loss = 1.6857e-01, PNorm = 129.4059, GNorm = 0.8440, lr_0 = 7.8346e-04
Loss = 1.2856e-01, PNorm = 129.5168, GNorm = 0.6851, lr_0 = 7.8293e-04
Loss = 1.6474e-01, PNorm = 129.6333, GNorm = 0.7262, lr_0 = 7.8239e-04
Loss = 1.5205e-01, PNorm = 129.7416, GNorm = 0.6537, lr_0 = 7.8185e-04
Loss = 1.5238e-01, PNorm = 129.8499, GNorm = 0.8264, lr_0 = 7.8132e-04
Validation mae = 0.124989
Epoch 5
Loss = 8.8916e-02, PNorm = 129.9327, GNorm = 1.7588, lr_0 = 7.8078e-04
Loss = 8.6993e-02, PNorm = 130.0133, GNorm = 0.6110, lr_0 = 7.8025e-04
Loss = 8.3358e-02, PNorm = 130.0791, GNorm = 0.5723, lr_0 = 7.7971e-04
Loss = 8.1184e-02, PNorm = 130.1400, GNorm = 0.6277, lr_0 = 7.7918e-04
Loss = 9.0840e-02, PNorm = 130.1983, GNorm = 0.5280, lr_0 = 7.7864e-04
Loss = 7.5032e-02, PNorm = 130.2641, GNorm = 0.5296, lr_0 = 7.7811e-04
Loss = 9.4353e-02, PNorm = 130.3246, GNorm = 0.4994, lr_0 = 7.7758e-04
Loss = 8.2467e-02, PNorm = 130.3947, GNorm = 0.4198, lr_0 = 7.7705e-04
Loss = 7.6297e-02, PNorm = 130.4494, GNorm = 0.4527, lr_0 = 7.7651e-04
Loss = 7.7813e-02, PNorm = 130.5064, GNorm = 1.0585, lr_0 = 7.7598e-04
Loss = 7.6923e-02, PNorm = 130.5670, GNorm = 0.9090, lr_0 = 7.7545e-04
Loss = 7.3420e-02, PNorm = 130.6244, GNorm = 0.5156, lr_0 = 7.7492e-04
Loss = 6.9225e-02, PNorm = 130.6795, GNorm = 0.6623, lr_0 = 7.7439e-04
Loss = 8.0535e-02, PNorm = 130.7373, GNorm = 0.8538, lr_0 = 7.7386e-04
Loss = 6.7269e-02, PNorm = 130.7878, GNorm = 0.4547, lr_0 = 7.7333e-04
Loss = 8.0027e-02, PNorm = 130.8394, GNorm = 0.5676, lr_0 = 7.7280e-04
Loss = 6.9936e-02, PNorm = 130.8876, GNorm = 0.4176, lr_0 = 7.7227e-04
Loss = 8.3681e-02, PNorm = 130.9331, GNorm = 0.9492, lr_0 = 7.7174e-04
Loss = 8.1299e-02, PNorm = 130.9894, GNorm = 0.4617, lr_0 = 7.7121e-04
Loss = 8.8030e-02, PNorm = 131.0459, GNorm = 0.4434, lr_0 = 7.7068e-04
Loss = 7.9928e-02, PNorm = 131.1037, GNorm = 0.5228, lr_0 = 7.7015e-04
Loss = 8.0278e-02, PNorm = 131.1633, GNorm = 0.4798, lr_0 = 7.6963e-04
Loss = 8.9351e-02, PNorm = 131.2230, GNorm = 0.6807, lr_0 = 7.6910e-04
Loss = 8.7341e-02, PNorm = 131.2849, GNorm = 0.6769, lr_0 = 7.6857e-04
Loss = 7.1058e-02, PNorm = 131.3468, GNorm = 0.5254, lr_0 = 7.6805e-04
Loss = 8.1891e-02, PNorm = 131.4035, GNorm = 0.4436, lr_0 = 7.6752e-04
Loss = 7.5449e-02, PNorm = 131.4590, GNorm = 0.7345, lr_0 = 7.6699e-04
Loss = 8.0951e-02, PNorm = 131.5211, GNorm = 0.6447, lr_0 = 7.6647e-04
Loss = 8.1880e-02, PNorm = 131.5824, GNorm = 0.6440, lr_0 = 7.6594e-04
Loss = 8.7238e-02, PNorm = 131.6589, GNorm = 0.8328, lr_0 = 7.6542e-04
Loss = 9.1976e-02, PNorm = 131.7323, GNorm = 0.4883, lr_0 = 7.6489e-04
Loss = 8.7140e-02, PNorm = 131.8108, GNorm = 0.5252, lr_0 = 7.6437e-04
Loss = 8.1850e-02, PNorm = 131.8830, GNorm = 0.4586, lr_0 = 7.6385e-04
Loss = 8.9878e-02, PNorm = 131.9522, GNorm = 0.3784, lr_0 = 7.6332e-04
Loss = 1.0539e-01, PNorm = 132.0248, GNorm = 0.8402, lr_0 = 7.6280e-04
Loss = 7.5954e-02, PNorm = 132.0969, GNorm = 0.6810, lr_0 = 7.6228e-04
Loss = 1.0203e-01, PNorm = 132.1690, GNorm = 0.5657, lr_0 = 7.6176e-04
Loss = 8.4082e-02, PNorm = 132.2463, GNorm = 0.8824, lr_0 = 7.6123e-04
Loss = 9.6441e-02, PNorm = 132.3211, GNorm = 1.0588, lr_0 = 7.6071e-04
Loss = 8.5188e-02, PNorm = 132.3991, GNorm = 0.5190, lr_0 = 7.6019e-04
Loss = 8.0985e-02, PNorm = 132.4677, GNorm = 0.3696, lr_0 = 7.5967e-04
Loss = 6.8528e-02, PNorm = 132.5328, GNorm = 0.5905, lr_0 = 7.5915e-04
Loss = 6.9336e-02, PNorm = 132.5957, GNorm = 0.5666, lr_0 = 7.5863e-04
Loss = 7.4557e-02, PNorm = 132.6593, GNorm = 0.5402, lr_0 = 7.5811e-04
Loss = 1.0661e-01, PNorm = 132.7130, GNorm = 0.6111, lr_0 = 7.5759e-04
Loss = 8.4320e-02, PNorm = 132.7812, GNorm = 0.5997, lr_0 = 7.5707e-04
Loss = 8.5372e-02, PNorm = 132.8584, GNorm = 0.6894, lr_0 = 7.5655e-04
Loss = 7.3352e-02, PNorm = 132.9336, GNorm = 0.4625, lr_0 = 7.5603e-04
Loss = 1.0033e-01, PNorm = 133.0133, GNorm = 0.4292, lr_0 = 7.5552e-04
Loss = 9.1640e-02, PNorm = 133.0879, GNorm = 0.6427, lr_0 = 7.5500e-04
Loss = 9.4187e-02, PNorm = 133.1695, GNorm = 0.5262, lr_0 = 7.5448e-04
Loss = 8.8917e-02, PNorm = 133.2460, GNorm = 0.7768, lr_0 = 7.5397e-04
Loss = 8.0677e-02, PNorm = 133.3317, GNorm = 0.6541, lr_0 = 7.5345e-04
Loss = 9.7332e-02, PNorm = 133.4114, GNorm = 0.7009, lr_0 = 7.5293e-04
Loss = 9.0883e-02, PNorm = 133.4892, GNorm = 0.5624, lr_0 = 7.5242e-04
Loss = 8.0367e-02, PNorm = 133.5705, GNorm = 0.7752, lr_0 = 7.5190e-04
Loss = 7.0647e-02, PNorm = 133.6411, GNorm = 0.5173, lr_0 = 7.5139e-04
Loss = 7.7935e-02, PNorm = 133.7098, GNorm = 0.6841, lr_0 = 7.5087e-04
Loss = 8.9313e-02, PNorm = 133.7786, GNorm = 0.5495, lr_0 = 7.5036e-04
Loss = 7.9513e-02, PNorm = 133.8517, GNorm = 0.5522, lr_0 = 7.4984e-04
Loss = 1.0156e-01, PNorm = 133.9299, GNorm = 0.8501, lr_0 = 7.4933e-04
Loss = 8.2367e-02, PNorm = 134.0073, GNorm = 0.5749, lr_0 = 7.4882e-04
Loss = 8.0881e-02, PNorm = 134.0949, GNorm = 0.7815, lr_0 = 7.4830e-04
Loss = 7.4771e-02, PNorm = 134.1663, GNorm = 0.5296, lr_0 = 7.4779e-04
Loss = 7.4345e-02, PNorm = 134.2429, GNorm = 0.4541, lr_0 = 7.4728e-04
Loss = 1.0580e-01, PNorm = 134.3054, GNorm = 0.6783, lr_0 = 7.4677e-04
Loss = 8.3254e-02, PNorm = 134.3844, GNorm = 0.5573, lr_0 = 7.4625e-04
Loss = 9.1351e-02, PNorm = 134.4560, GNorm = 0.5032, lr_0 = 7.4574e-04
Loss = 7.8868e-02, PNorm = 134.5351, GNorm = 0.5299, lr_0 = 7.4523e-04
Loss = 1.2395e-01, PNorm = 134.6163, GNorm = 1.5851, lr_0 = 7.4472e-04
Loss = 8.5819e-02, PNorm = 134.6925, GNorm = 0.6593, lr_0 = 7.4421e-04
Loss = 1.1765e-01, PNorm = 134.7854, GNorm = 0.5794, lr_0 = 7.4370e-04
Loss = 8.0243e-02, PNorm = 134.8700, GNorm = 1.4254, lr_0 = 7.4319e-04
Loss = 9.0801e-02, PNorm = 134.9571, GNorm = 0.6539, lr_0 = 7.4268e-04
Loss = 9.7480e-02, PNorm = 135.0335, GNorm = 0.7479, lr_0 = 7.4217e-04
Loss = 7.2593e-02, PNorm = 135.1120, GNorm = 0.4577, lr_0 = 7.4167e-04
Loss = 8.3367e-02, PNorm = 135.1855, GNorm = 0.5284, lr_0 = 7.4116e-04
Loss = 1.1373e-01, PNorm = 135.2659, GNorm = 0.7280, lr_0 = 7.4065e-04
Loss = 9.9942e-02, PNorm = 135.3475, GNorm = 1.0072, lr_0 = 7.4014e-04
Loss = 8.8004e-02, PNorm = 135.4196, GNorm = 0.5440, lr_0 = 7.3964e-04
Loss = 1.0313e-01, PNorm = 135.4953, GNorm = 0.4637, lr_0 = 7.3913e-04
Loss = 9.8229e-02, PNorm = 135.5792, GNorm = 0.8419, lr_0 = 7.3862e-04
Loss = 1.0176e-01, PNorm = 135.6624, GNorm = 0.5797, lr_0 = 7.3812e-04
Loss = 7.9026e-02, PNorm = 135.7483, GNorm = 0.8767, lr_0 = 7.3761e-04
Loss = 7.4440e-02, PNorm = 135.8227, GNorm = 0.6830, lr_0 = 7.3711e-04
Loss = 9.4286e-02, PNorm = 135.8971, GNorm = 1.1597, lr_0 = 7.3660e-04
Loss = 7.7151e-02, PNorm = 135.9681, GNorm = 0.4659, lr_0 = 7.3610e-04
Loss = 1.0121e-01, PNorm = 136.0428, GNorm = 1.0613, lr_0 = 7.3559e-04
Loss = 9.4366e-02, PNorm = 136.1216, GNorm = 0.9381, lr_0 = 7.3509e-04
Loss = 8.9714e-02, PNorm = 136.1977, GNorm = 0.5203, lr_0 = 7.3458e-04
Loss = 8.9946e-02, PNorm = 136.2814, GNorm = 0.7171, lr_0 = 7.3408e-04
Loss = 8.5491e-02, PNorm = 136.3543, GNorm = 0.4570, lr_0 = 7.3358e-04
Loss = 9.6464e-02, PNorm = 136.4313, GNorm = 0.7814, lr_0 = 7.3308e-04
Loss = 1.1089e-01, PNorm = 136.5222, GNorm = 0.4628, lr_0 = 7.3257e-04
Loss = 1.0474e-01, PNorm = 136.6196, GNorm = 0.7588, lr_0 = 7.3207e-04
Loss = 1.0522e-01, PNorm = 136.7163, GNorm = 1.2045, lr_0 = 7.3157e-04
Loss = 9.3471e-02, PNorm = 136.8055, GNorm = 0.7134, lr_0 = 7.3107e-04
Loss = 8.8302e-02, PNorm = 136.8964, GNorm = 0.7474, lr_0 = 7.3057e-04
Loss = 9.4986e-02, PNorm = 136.9756, GNorm = 1.1181, lr_0 = 7.3007e-04
Loss = 9.0591e-02, PNorm = 137.0585, GNorm = 0.9731, lr_0 = 7.2957e-04
Loss = 1.0031e-01, PNorm = 137.1419, GNorm = 0.6832, lr_0 = 7.2907e-04
Loss = 1.0040e-01, PNorm = 137.2147, GNorm = 0.7366, lr_0 = 7.2857e-04
Loss = 1.0320e-01, PNorm = 137.3021, GNorm = 1.3746, lr_0 = 7.2807e-04
Loss = 9.9534e-02, PNorm = 137.3794, GNorm = 0.4709, lr_0 = 7.2757e-04
Loss = 1.0764e-01, PNorm = 137.4694, GNorm = 0.7777, lr_0 = 7.2707e-04
Loss = 1.0298e-01, PNorm = 137.5531, GNorm = 0.9118, lr_0 = 7.2657e-04
Loss = 1.0157e-01, PNorm = 137.6428, GNorm = 1.1436, lr_0 = 7.2608e-04
Loss = 1.0001e-01, PNorm = 137.7286, GNorm = 0.4336, lr_0 = 7.2558e-04
Loss = 9.3728e-02, PNorm = 137.8149, GNorm = 0.5562, lr_0 = 7.2508e-04
Loss = 9.4226e-02, PNorm = 137.8984, GNorm = 0.7482, lr_0 = 7.2458e-04
Loss = 1.1068e-01, PNorm = 137.9927, GNorm = 1.0695, lr_0 = 7.2409e-04
Loss = 1.1178e-01, PNorm = 138.0799, GNorm = 0.6175, lr_0 = 7.2359e-04
Loss = 9.3505e-02, PNorm = 138.1707, GNorm = 0.8725, lr_0 = 7.2310e-04
Loss = 1.0546e-01, PNorm = 138.2588, GNorm = 1.5829, lr_0 = 7.2260e-04
Loss = 1.1196e-01, PNorm = 138.3504, GNorm = 0.7274, lr_0 = 7.2211e-04
Loss = 9.5557e-02, PNorm = 138.4443, GNorm = 0.7151, lr_0 = 7.2161e-04
Loss = 9.4363e-02, PNorm = 138.5326, GNorm = 0.8443, lr_0 = 7.2112e-04
Loss = 9.2538e-02, PNorm = 138.6213, GNorm = 0.8266, lr_0 = 7.2062e-04
Loss = 9.2813e-02, PNorm = 138.7112, GNorm = 0.7778, lr_0 = 7.2013e-04
Loss = 9.2609e-02, PNorm = 138.8037, GNorm = 0.6340, lr_0 = 7.1964e-04
Validation mae = 0.125631
Epoch 6
Loss = 6.0405e-02, PNorm = 138.8804, GNorm = 0.7617, lr_0 = 7.1914e-04
Loss = 7.1575e-02, PNorm = 138.9417, GNorm = 0.5621, lr_0 = 7.1865e-04
Loss = 7.2740e-02, PNorm = 139.0031, GNorm = 0.6425, lr_0 = 7.1816e-04
Loss = 7.0743e-02, PNorm = 139.0569, GNorm = 0.4137, lr_0 = 7.1767e-04
Loss = 7.5345e-02, PNorm = 139.1088, GNorm = 0.4260, lr_0 = 7.1717e-04
Loss = 6.3271e-02, PNorm = 139.1598, GNorm = 0.5476, lr_0 = 7.1668e-04
Loss = 6.9843e-02, PNorm = 139.2089, GNorm = 0.7319, lr_0 = 7.1619e-04
Loss = 6.3255e-02, PNorm = 139.2582, GNorm = 0.6873, lr_0 = 7.1570e-04
Loss = 5.6854e-02, PNorm = 139.3054, GNorm = 0.5901, lr_0 = 7.1521e-04
Loss = 6.1433e-02, PNorm = 139.3542, GNorm = 0.4149, lr_0 = 7.1472e-04
Loss = 5.1829e-02, PNorm = 139.4014, GNorm = 0.3589, lr_0 = 7.1423e-04
Loss = 5.4927e-02, PNorm = 139.4511, GNorm = 0.4774, lr_0 = 7.1374e-04
Loss = 6.1185e-02, PNorm = 139.4948, GNorm = 0.4009, lr_0 = 7.1325e-04
Loss = 6.5712e-02, PNorm = 139.5435, GNorm = 0.7174, lr_0 = 7.1277e-04
Loss = 6.6959e-02, PNorm = 139.6052, GNorm = 0.5696, lr_0 = 7.1228e-04
Loss = 5.5081e-02, PNorm = 139.6596, GNorm = 0.5280, lr_0 = 7.1179e-04
Loss = 6.8162e-02, PNorm = 139.7074, GNorm = 0.5409, lr_0 = 7.1130e-04
Loss = 5.7691e-02, PNorm = 139.7564, GNorm = 0.4296, lr_0 = 7.1081e-04
Loss = 5.9319e-02, PNorm = 139.8094, GNorm = 0.5648, lr_0 = 7.1033e-04
Loss = 6.5086e-02, PNorm = 139.8611, GNorm = 0.5335, lr_0 = 7.0984e-04
Loss = 5.2012e-02, PNorm = 139.9109, GNorm = 0.3996, lr_0 = 7.0935e-04
Loss = 6.1243e-02, PNorm = 139.9577, GNorm = 0.6120, lr_0 = 7.0887e-04
Loss = 5.9637e-02, PNorm = 140.0071, GNorm = 0.4666, lr_0 = 7.0838e-04
Loss = 5.5425e-02, PNorm = 140.0564, GNorm = 0.3052, lr_0 = 7.0790e-04
Loss = 5.4437e-02, PNorm = 140.1043, GNorm = 0.5943, lr_0 = 7.0741e-04
Loss = 7.1283e-02, PNorm = 140.1514, GNorm = 0.7132, lr_0 = 7.0693e-04
Loss = 8.5483e-02, PNorm = 140.2168, GNorm = 0.6987, lr_0 = 7.0644e-04
Loss = 5.8454e-02, PNorm = 140.2740, GNorm = 0.5044, lr_0 = 7.0596e-04
Loss = 6.4871e-02, PNorm = 140.3322, GNorm = 0.3893, lr_0 = 7.0548e-04
Loss = 5.3378e-02, PNorm = 140.3844, GNorm = 0.6427, lr_0 = 7.0499e-04
Loss = 6.4574e-02, PNorm = 140.4324, GNorm = 0.5126, lr_0 = 7.0451e-04
Loss = 5.2989e-02, PNorm = 140.4862, GNorm = 0.6217, lr_0 = 7.0403e-04
Loss = 6.2252e-02, PNorm = 140.5450, GNorm = 0.4978, lr_0 = 7.0354e-04
Loss = 4.8871e-02, PNorm = 140.6002, GNorm = 0.5235, lr_0 = 7.0306e-04
Loss = 6.5911e-02, PNorm = 140.6467, GNorm = 0.4904, lr_0 = 7.0258e-04
Loss = 5.4984e-02, PNorm = 140.7057, GNorm = 0.6511, lr_0 = 7.0210e-04
Loss = 7.3400e-02, PNorm = 140.7603, GNorm = 0.4311, lr_0 = 7.0162e-04
Loss = 6.2592e-02, PNorm = 140.8157, GNorm = 0.4224, lr_0 = 7.0114e-04
Loss = 6.0442e-02, PNorm = 140.8822, GNorm = 0.5146, lr_0 = 7.0066e-04
Loss = 5.4412e-02, PNorm = 140.9398, GNorm = 0.6575, lr_0 = 7.0018e-04
Loss = 5.8385e-02, PNorm = 140.9922, GNorm = 0.5461, lr_0 = 6.9970e-04
Loss = 6.9948e-02, PNorm = 141.0548, GNorm = 0.4308, lr_0 = 6.9922e-04
Loss = 6.3822e-02, PNorm = 141.1157, GNorm = 0.6321, lr_0 = 6.9874e-04
Loss = 7.1961e-02, PNorm = 141.1711, GNorm = 0.3526, lr_0 = 6.9826e-04
Loss = 4.2979e-02, PNorm = 141.2304, GNorm = 0.3822, lr_0 = 6.9778e-04
Loss = 5.4381e-02, PNorm = 141.2784, GNorm = 0.5189, lr_0 = 6.9730e-04
Loss = 6.1821e-02, PNorm = 141.3300, GNorm = 0.7866, lr_0 = 6.9683e-04
Loss = 5.5384e-02, PNorm = 141.3859, GNorm = 0.6806, lr_0 = 6.9635e-04
Loss = 5.7213e-02, PNorm = 141.4388, GNorm = 1.0885, lr_0 = 6.9587e-04
Loss = 5.3006e-02, PNorm = 141.4963, GNorm = 0.4192, lr_0 = 6.9540e-04
Loss = 4.8829e-02, PNorm = 141.5473, GNorm = 0.6339, lr_0 = 6.9492e-04
Loss = 6.4095e-02, PNorm = 141.5988, GNorm = 0.6796, lr_0 = 6.9444e-04
Loss = 6.3602e-02, PNorm = 141.6553, GNorm = 0.6476, lr_0 = 6.9397e-04
Loss = 6.0129e-02, PNorm = 141.7211, GNorm = 0.4230, lr_0 = 6.9349e-04
Loss = 7.1167e-02, PNorm = 141.7763, GNorm = 0.5395, lr_0 = 6.9302e-04
Loss = 6.7867e-02, PNorm = 141.8396, GNorm = 0.4741, lr_0 = 6.9254e-04
Loss = 6.9696e-02, PNorm = 141.9060, GNorm = 0.5878, lr_0 = 6.9207e-04
Loss = 4.6864e-02, PNorm = 141.9737, GNorm = 0.6311, lr_0 = 6.9159e-04
Loss = 6.5103e-02, PNorm = 142.0358, GNorm = 0.5825, lr_0 = 6.9112e-04
Loss = 5.9521e-02, PNorm = 142.0945, GNorm = 0.6352, lr_0 = 6.9065e-04
Loss = 7.1109e-02, PNorm = 142.1596, GNorm = 0.7637, lr_0 = 6.9017e-04
Loss = 5.9708e-02, PNorm = 142.2172, GNorm = 0.4172, lr_0 = 6.8970e-04
Loss = 7.4392e-02, PNorm = 142.2682, GNorm = 0.4557, lr_0 = 6.8923e-04
Loss = 6.5009e-02, PNorm = 142.3364, GNorm = 0.5053, lr_0 = 6.8876e-04
Loss = 7.2184e-02, PNorm = 142.4050, GNorm = 0.8587, lr_0 = 6.8828e-04
Loss = 6.9255e-02, PNorm = 142.4711, GNorm = 0.4701, lr_0 = 6.8781e-04
Loss = 6.2249e-02, PNorm = 142.5320, GNorm = 0.6320, lr_0 = 6.8734e-04
Loss = 6.4757e-02, PNorm = 142.5935, GNorm = 0.5555, lr_0 = 6.8687e-04
Loss = 6.3927e-02, PNorm = 142.6564, GNorm = 0.4323, lr_0 = 6.8640e-04
Loss = 5.9499e-02, PNorm = 142.7152, GNorm = 0.5463, lr_0 = 6.8593e-04
Loss = 6.9123e-02, PNorm = 142.7710, GNorm = 0.8356, lr_0 = 6.8546e-04
Loss = 7.3677e-02, PNorm = 142.8389, GNorm = 0.3720, lr_0 = 6.8499e-04
Loss = 6.4573e-02, PNorm = 142.9009, GNorm = 0.6150, lr_0 = 6.8452e-04
Loss = 8.0634e-02, PNorm = 142.9647, GNorm = 0.4636, lr_0 = 6.8405e-04
Loss = 6.7340e-02, PNorm = 143.0363, GNorm = 0.7542, lr_0 = 6.8358e-04
Loss = 6.9977e-02, PNorm = 143.1023, GNorm = 0.6748, lr_0 = 6.8312e-04
Loss = 5.5776e-02, PNorm = 143.1676, GNorm = 0.4053, lr_0 = 6.8265e-04
Loss = 6.4944e-02, PNorm = 143.2301, GNorm = 0.6722, lr_0 = 6.8218e-04
Loss = 7.7326e-02, PNorm = 143.2951, GNorm = 0.9641, lr_0 = 6.8171e-04
Loss = 6.5713e-02, PNorm = 143.3725, GNorm = 0.4408, lr_0 = 6.8125e-04
Loss = 6.9429e-02, PNorm = 143.4417, GNorm = 0.5797, lr_0 = 6.8078e-04
Loss = 6.5601e-02, PNorm = 143.5156, GNorm = 0.4869, lr_0 = 6.8031e-04
Loss = 6.3367e-02, PNorm = 143.5917, GNorm = 0.5148, lr_0 = 6.7985e-04
Loss = 6.2682e-02, PNorm = 143.6571, GNorm = 0.4728, lr_0 = 6.7938e-04
Loss = 7.6982e-02, PNorm = 143.7249, GNorm = 0.5930, lr_0 = 6.7892e-04
Loss = 7.1255e-02, PNorm = 143.7906, GNorm = 0.6330, lr_0 = 6.7845e-04
Loss = 5.6846e-02, PNorm = 143.8626, GNorm = 0.5783, lr_0 = 6.7799e-04
Loss = 6.2512e-02, PNorm = 143.9324, GNorm = 0.6220, lr_0 = 6.7752e-04
Loss = 6.2071e-02, PNorm = 144.0047, GNorm = 0.5226, lr_0 = 6.7706e-04
Loss = 7.0641e-02, PNorm = 144.0741, GNorm = 0.3993, lr_0 = 6.7659e-04
Loss = 6.7531e-02, PNorm = 144.1492, GNorm = 0.6232, lr_0 = 6.7613e-04
Loss = 7.4587e-02, PNorm = 144.2244, GNorm = 1.2266, lr_0 = 6.7567e-04
Loss = 7.1799e-02, PNorm = 144.3031, GNorm = 0.4587, lr_0 = 6.7520e-04
Loss = 5.7293e-02, PNorm = 144.3815, GNorm = 0.6580, lr_0 = 6.7474e-04
Loss = 6.3591e-02, PNorm = 144.4550, GNorm = 0.3745, lr_0 = 6.7428e-04
Loss = 7.5864e-02, PNorm = 144.5312, GNorm = 0.7244, lr_0 = 6.7382e-04
Loss = 7.2715e-02, PNorm = 144.5959, GNorm = 0.6045, lr_0 = 6.7335e-04
Loss = 8.3587e-02, PNorm = 144.6652, GNorm = 0.4768, lr_0 = 6.7289e-04
Loss = 6.6112e-02, PNorm = 144.7344, GNorm = 0.4247, lr_0 = 6.7243e-04
Loss = 6.1760e-02, PNorm = 144.7988, GNorm = 0.4986, lr_0 = 6.7197e-04
Loss = 7.1185e-02, PNorm = 144.8705, GNorm = 0.6281, lr_0 = 6.7151e-04
Loss = 7.2053e-02, PNorm = 144.9431, GNorm = 1.0009, lr_0 = 6.7105e-04
Loss = 6.4039e-02, PNorm = 145.0078, GNorm = 0.7953, lr_0 = 6.7059e-04
Loss = 8.7945e-02, PNorm = 145.0828, GNorm = 0.9756, lr_0 = 6.7013e-04
Loss = 6.2145e-02, PNorm = 145.1616, GNorm = 0.3976, lr_0 = 6.6967e-04
Loss = 6.3809e-02, PNorm = 145.2372, GNorm = 0.7374, lr_0 = 6.6921e-04
Loss = 5.4922e-02, PNorm = 145.3048, GNorm = 0.5898, lr_0 = 6.6876e-04
Loss = 7.3631e-02, PNorm = 145.3622, GNorm = 0.8098, lr_0 = 6.6830e-04
Loss = 6.3869e-02, PNorm = 145.4252, GNorm = 0.5227, lr_0 = 6.6784e-04
Loss = 6.3447e-02, PNorm = 145.4874, GNorm = 0.5157, lr_0 = 6.6738e-04
Loss = 8.5583e-02, PNorm = 145.5532, GNorm = 0.4963, lr_0 = 6.6693e-04
Loss = 6.6517e-02, PNorm = 145.6199, GNorm = 0.4870, lr_0 = 6.6647e-04
Loss = 6.9728e-02, PNorm = 145.6900, GNorm = 0.6375, lr_0 = 6.6601e-04
Loss = 6.9145e-02, PNorm = 145.7554, GNorm = 0.7641, lr_0 = 6.6556e-04
Loss = 9.2847e-02, PNorm = 145.8273, GNorm = 0.3941, lr_0 = 6.6510e-04
Loss = 7.0939e-02, PNorm = 145.8995, GNorm = 0.6391, lr_0 = 6.6464e-04
Loss = 8.1804e-02, PNorm = 145.9715, GNorm = 0.6697, lr_0 = 6.6419e-04
Loss = 7.1707e-02, PNorm = 146.0492, GNorm = 0.3549, lr_0 = 6.6373e-04
Loss = 7.1083e-02, PNorm = 146.1220, GNorm = 0.9301, lr_0 = 6.6328e-04
Loss = 7.0440e-02, PNorm = 146.1961, GNorm = 0.6901, lr_0 = 6.6282e-04
Validation mae = 0.124204
Epoch 7
Loss = 6.1053e-02, PNorm = 146.2565, GNorm = 0.5559, lr_0 = 6.6237e-04
Loss = 5.7682e-02, PNorm = 146.3102, GNorm = 0.5042, lr_0 = 6.6192e-04
Loss = 5.4446e-02, PNorm = 146.3652, GNorm = 0.6375, lr_0 = 6.6146e-04
Loss = 7.6940e-02, PNorm = 146.4126, GNorm = 0.3235, lr_0 = 6.6101e-04
Loss = 5.1414e-02, PNorm = 146.4625, GNorm = 0.3679, lr_0 = 6.6056e-04
Loss = 4.8780e-02, PNorm = 146.5012, GNorm = 0.5404, lr_0 = 6.6011e-04
Loss = 5.0376e-02, PNorm = 146.5446, GNorm = 0.6065, lr_0 = 6.5965e-04
Loss = 4.8327e-02, PNorm = 146.5860, GNorm = 0.4097, lr_0 = 6.5920e-04
Loss = 4.6843e-02, PNorm = 146.6321, GNorm = 0.4436, lr_0 = 6.5875e-04
Loss = 4.3733e-02, PNorm = 146.6776, GNorm = 0.3452, lr_0 = 6.5830e-04
Loss = 5.3217e-02, PNorm = 146.7207, GNorm = 0.5273, lr_0 = 6.5785e-04
Loss = 5.1479e-02, PNorm = 146.7638, GNorm = 0.5468, lr_0 = 6.5740e-04
Loss = 4.9790e-02, PNorm = 146.8103, GNorm = 0.3670, lr_0 = 6.5695e-04
Loss = 5.2393e-02, PNorm = 146.8524, GNorm = 0.3414, lr_0 = 6.5650e-04
Loss = 4.0667e-02, PNorm = 146.8946, GNorm = 0.5132, lr_0 = 6.5605e-04
Loss = 6.0624e-02, PNorm = 146.9390, GNorm = 0.4479, lr_0 = 6.5560e-04
Loss = 4.9186e-02, PNorm = 146.9838, GNorm = 0.5756, lr_0 = 6.5515e-04
Loss = 5.1863e-02, PNorm = 147.0310, GNorm = 0.2858, lr_0 = 6.5470e-04
Loss = 5.3652e-02, PNorm = 147.0771, GNorm = 0.4920, lr_0 = 6.5425e-04
Loss = 5.0614e-02, PNorm = 147.1237, GNorm = 0.7099, lr_0 = 6.5380e-04
Loss = 4.1677e-02, PNorm = 147.1698, GNorm = 0.3993, lr_0 = 6.5335e-04
Loss = 4.7303e-02, PNorm = 147.2142, GNorm = 0.5090, lr_0 = 6.5291e-04
Loss = 4.3947e-02, PNorm = 147.2591, GNorm = 0.5857, lr_0 = 6.5246e-04
Loss = 4.9369e-02, PNorm = 147.3066, GNorm = 0.3910, lr_0 = 6.5201e-04
Loss = 3.9417e-02, PNorm = 147.3544, GNorm = 0.5893, lr_0 = 6.5157e-04
Loss = 5.5143e-02, PNorm = 147.3902, GNorm = 0.6998, lr_0 = 6.5112e-04
Loss = 4.5487e-02, PNorm = 147.4273, GNorm = 0.3133, lr_0 = 6.5067e-04
Loss = 4.7596e-02, PNorm = 147.4667, GNorm = 0.3941, lr_0 = 6.5023e-04
Loss = 5.0490e-02, PNorm = 147.5116, GNorm = 0.3158, lr_0 = 6.4978e-04
Loss = 4.5346e-02, PNorm = 147.5544, GNorm = 0.7858, lr_0 = 6.4934e-04
Loss = 4.9649e-02, PNorm = 147.6046, GNorm = 0.5066, lr_0 = 6.4889e-04
Loss = 4.0607e-02, PNorm = 147.6484, GNorm = 0.4571, lr_0 = 6.4845e-04
Loss = 5.4361e-02, PNorm = 147.6905, GNorm = 0.5833, lr_0 = 6.4800e-04
Loss = 4.2634e-02, PNorm = 147.7374, GNorm = 0.3741, lr_0 = 6.4756e-04
Loss = 4.4000e-02, PNorm = 147.7831, GNorm = 0.4043, lr_0 = 6.4712e-04
Loss = 4.3441e-02, PNorm = 147.8298, GNorm = 0.5974, lr_0 = 6.4667e-04
Loss = 4.7862e-02, PNorm = 147.8769, GNorm = 0.5137, lr_0 = 6.4623e-04
Loss = 3.7568e-02, PNorm = 147.9230, GNorm = 0.2810, lr_0 = 6.4579e-04
Loss = 4.2287e-02, PNorm = 147.9653, GNorm = 0.3043, lr_0 = 6.4534e-04
Loss = 6.1844e-02, PNorm = 148.0070, GNorm = 0.7993, lr_0 = 6.4490e-04
Loss = 5.1269e-02, PNorm = 148.0508, GNorm = 0.3076, lr_0 = 6.4446e-04
Loss = 4.8254e-02, PNorm = 148.0960, GNorm = 0.7262, lr_0 = 6.4402e-04
Loss = 4.2518e-02, PNorm = 148.1391, GNorm = 0.3110, lr_0 = 6.4358e-04
Loss = 4.4697e-02, PNorm = 148.1880, GNorm = 0.5605, lr_0 = 6.4314e-04
Loss = 3.3050e-02, PNorm = 148.2290, GNorm = 0.4806, lr_0 = 6.4270e-04
Loss = 4.4536e-02, PNorm = 148.2705, GNorm = 0.5213, lr_0 = 6.4226e-04
Loss = 5.0448e-02, PNorm = 148.3183, GNorm = 0.8289, lr_0 = 6.4182e-04
Loss = 5.0287e-02, PNorm = 148.3642, GNorm = 0.4228, lr_0 = 6.4138e-04
Loss = 4.3333e-02, PNorm = 148.4109, GNorm = 0.4305, lr_0 = 6.4094e-04
Loss = 4.2904e-02, PNorm = 148.4581, GNorm = 0.5263, lr_0 = 6.4050e-04
Loss = 4.1891e-02, PNorm = 148.5051, GNorm = 0.3939, lr_0 = 6.4006e-04
Loss = 3.9891e-02, PNorm = 148.5504, GNorm = 0.3031, lr_0 = 6.3962e-04
Loss = 5.4601e-02, PNorm = 148.5987, GNorm = 0.5184, lr_0 = 6.3918e-04
Loss = 4.4471e-02, PNorm = 148.6541, GNorm = 0.5535, lr_0 = 6.3874e-04
Loss = 4.6534e-02, PNorm = 148.7128, GNorm = 0.5050, lr_0 = 6.3831e-04
Loss = 4.8657e-02, PNorm = 148.7694, GNorm = 0.4199, lr_0 = 6.3787e-04
Loss = 4.9263e-02, PNorm = 148.8301, GNorm = 0.5870, lr_0 = 6.3743e-04
Loss = 4.9921e-02, PNorm = 148.8838, GNorm = 0.3884, lr_0 = 6.3700e-04
Loss = 5.1003e-02, PNorm = 148.9396, GNorm = 0.4662, lr_0 = 6.3656e-04
Loss = 4.2436e-02, PNorm = 149.0003, GNorm = 0.4437, lr_0 = 6.3612e-04
Loss = 4.3720e-02, PNorm = 149.0539, GNorm = 0.2671, lr_0 = 6.3569e-04
Loss = 4.3471e-02, PNorm = 149.1076, GNorm = 0.3598, lr_0 = 6.3525e-04
Loss = 4.3267e-02, PNorm = 149.1604, GNorm = 0.5245, lr_0 = 6.3482e-04
Loss = 6.3649e-02, PNorm = 149.2214, GNorm = 0.6434, lr_0 = 6.3438e-04
Loss = 4.7315e-02, PNorm = 149.2799, GNorm = 1.2070, lr_0 = 6.3395e-04
Loss = 5.4549e-02, PNorm = 149.3313, GNorm = 0.4003, lr_0 = 6.3351e-04
Loss = 4.8943e-02, PNorm = 149.3915, GNorm = 0.8651, lr_0 = 6.3308e-04
Loss = 4.8943e-02, PNorm = 149.4422, GNorm = 0.3926, lr_0 = 6.3265e-04
Loss = 4.4804e-02, PNorm = 149.4951, GNorm = 0.7308, lr_0 = 6.3221e-04
Loss = 4.8618e-02, PNorm = 149.5503, GNorm = 0.5425, lr_0 = 6.3178e-04
Loss = 3.8911e-02, PNorm = 149.6037, GNorm = 0.3629, lr_0 = 6.3135e-04
Loss = 4.5652e-02, PNorm = 149.6489, GNorm = 0.4143, lr_0 = 6.3091e-04
Loss = 4.0540e-02, PNorm = 149.7004, GNorm = 0.4914, lr_0 = 6.3048e-04
Loss = 4.3607e-02, PNorm = 149.7555, GNorm = 0.4116, lr_0 = 6.3005e-04
Loss = 4.0832e-02, PNorm = 149.8065, GNorm = 0.3710, lr_0 = 6.2962e-04
Loss = 5.2143e-02, PNorm = 149.8551, GNorm = 0.8001, lr_0 = 6.2919e-04
Loss = 4.4316e-02, PNorm = 149.9083, GNorm = 0.4859, lr_0 = 6.2876e-04
Loss = 5.1031e-02, PNorm = 149.9544, GNorm = 0.4958, lr_0 = 6.2833e-04
Loss = 5.5183e-02, PNorm = 150.0072, GNorm = 1.4625, lr_0 = 6.2789e-04
Loss = 6.7221e-02, PNorm = 150.0640, GNorm = 0.4381, lr_0 = 6.2746e-04
Loss = 4.3593e-02, PNorm = 150.1249, GNorm = 0.4909, lr_0 = 6.2703e-04
Loss = 5.4692e-02, PNorm = 150.1757, GNorm = 0.4862, lr_0 = 6.2661e-04
Loss = 4.8993e-02, PNorm = 150.2317, GNorm = 0.3441, lr_0 = 6.2618e-04
Loss = 5.2007e-02, PNorm = 150.2915, GNorm = 0.5173, lr_0 = 6.2575e-04
Loss = 5.2180e-02, PNorm = 150.3451, GNorm = 0.9739, lr_0 = 6.2532e-04
Loss = 4.3392e-02, PNorm = 150.3989, GNorm = 0.3691, lr_0 = 6.2489e-04
Loss = 4.8461e-02, PNorm = 150.4543, GNorm = 0.5074, lr_0 = 6.2446e-04
Loss = 5.9828e-02, PNorm = 150.5100, GNorm = 0.4539, lr_0 = 6.2403e-04
Loss = 4.6281e-02, PNorm = 150.5732, GNorm = 0.6807, lr_0 = 6.2361e-04
Loss = 5.3827e-02, PNorm = 150.6338, GNorm = 0.3334, lr_0 = 6.2318e-04
Loss = 5.2808e-02, PNorm = 150.6898, GNorm = 0.2447, lr_0 = 6.2275e-04
Loss = 4.0800e-02, PNorm = 150.7455, GNorm = 0.2778, lr_0 = 6.2233e-04
Loss = 4.8428e-02, PNorm = 150.7932, GNorm = 0.3487, lr_0 = 6.2190e-04
Loss = 6.9761e-02, PNorm = 150.8557, GNorm = 0.6771, lr_0 = 6.2147e-04
Loss = 7.9635e-02, PNorm = 150.9152, GNorm = 0.2848, lr_0 = 6.2105e-04
Loss = 5.7266e-02, PNorm = 150.9787, GNorm = 0.6335, lr_0 = 6.2062e-04
Loss = 5.1197e-02, PNorm = 151.0426, GNorm = 0.9459, lr_0 = 6.2020e-04
Loss = 5.6057e-02, PNorm = 151.1045, GNorm = 0.6295, lr_0 = 6.1977e-04
Loss = 5.5573e-02, PNorm = 151.1616, GNorm = 0.4327, lr_0 = 6.1935e-04
Loss = 4.5401e-02, PNorm = 151.2216, GNorm = 0.3144, lr_0 = 6.1892e-04
Loss = 5.5074e-02, PNorm = 151.2846, GNorm = 0.3993, lr_0 = 6.1850e-04
Loss = 4.5593e-02, PNorm = 151.3411, GNorm = 0.4552, lr_0 = 6.1808e-04
Loss = 8.5171e-02, PNorm = 151.3981, GNorm = 0.4206, lr_0 = 6.1765e-04
Loss = 5.1304e-02, PNorm = 151.4574, GNorm = 0.6650, lr_0 = 6.1723e-04
Loss = 5.2619e-02, PNorm = 151.5204, GNorm = 0.9039, lr_0 = 6.1681e-04
Loss = 5.5682e-02, PNorm = 151.5805, GNorm = 0.6589, lr_0 = 6.1638e-04
Loss = 5.2214e-02, PNorm = 151.6466, GNorm = 0.6308, lr_0 = 6.1596e-04
Loss = 6.4773e-02, PNorm = 151.7092, GNorm = 0.6170, lr_0 = 6.1554e-04
Loss = 6.3284e-02, PNorm = 151.7707, GNorm = 0.6451, lr_0 = 6.1512e-04
Loss = 5.7707e-02, PNorm = 151.8333, GNorm = 0.6732, lr_0 = 6.1470e-04
Loss = 4.9608e-02, PNorm = 151.8906, GNorm = 0.3854, lr_0 = 6.1428e-04
Loss = 5.4129e-02, PNorm = 151.9544, GNorm = 0.4366, lr_0 = 6.1385e-04
Loss = 6.4038e-02, PNorm = 152.0097, GNorm = 0.3429, lr_0 = 6.1343e-04
Loss = 5.1642e-02, PNorm = 152.0704, GNorm = 0.4723, lr_0 = 6.1301e-04
Loss = 5.6593e-02, PNorm = 152.1245, GNorm = 0.6428, lr_0 = 6.1259e-04
Loss = 6.6746e-02, PNorm = 152.1823, GNorm = 0.4794, lr_0 = 6.1217e-04
Loss = 5.4747e-02, PNorm = 152.2482, GNorm = 0.4723, lr_0 = 6.1175e-04
Loss = 5.6698e-02, PNorm = 152.3088, GNorm = 0.4750, lr_0 = 6.1134e-04
Loss = 5.7553e-02, PNorm = 152.3688, GNorm = 0.6500, lr_0 = 6.1092e-04
Loss = 4.7710e-02, PNorm = 152.4270, GNorm = 0.4817, lr_0 = 6.1050e-04
Validation mae = 0.123495
Epoch 8
Loss = 5.2090e-02, PNorm = 152.4796, GNorm = 0.6262, lr_0 = 6.1008e-04
Loss = 4.6167e-02, PNorm = 152.5209, GNorm = 0.2662, lr_0 = 6.0966e-04
Loss = 4.4267e-02, PNorm = 152.5561, GNorm = 0.2879, lr_0 = 6.0924e-04
Loss = 4.7135e-02, PNorm = 152.5987, GNorm = 0.3486, lr_0 = 6.0883e-04
Loss = 3.4265e-02, PNorm = 152.6383, GNorm = 0.4157, lr_0 = 6.0841e-04
Loss = 4.7224e-02, PNorm = 152.6847, GNorm = 0.6884, lr_0 = 6.0799e-04
Loss = 3.8964e-02, PNorm = 152.7196, GNorm = 0.2930, lr_0 = 6.0758e-04
Loss = 4.3416e-02, PNorm = 152.7565, GNorm = 0.4605, lr_0 = 6.0716e-04
Loss = 4.2597e-02, PNorm = 152.7922, GNorm = 0.4190, lr_0 = 6.0674e-04
Loss = 4.7122e-02, PNorm = 152.8318, GNorm = 0.5376, lr_0 = 6.0633e-04
Loss = 4.0870e-02, PNorm = 152.8726, GNorm = 0.4443, lr_0 = 6.0591e-04
Loss = 3.3958e-02, PNorm = 152.9111, GNorm = 0.2733, lr_0 = 6.0550e-04
Loss = 4.6679e-02, PNorm = 152.9509, GNorm = 0.4776, lr_0 = 6.0508e-04
Loss = 3.8303e-02, PNorm = 152.9896, GNorm = 0.3966, lr_0 = 6.0467e-04
Loss = 4.9903e-02, PNorm = 153.0274, GNorm = 0.4909, lr_0 = 6.0425e-04
Loss = 3.6849e-02, PNorm = 153.0589, GNorm = 0.6270, lr_0 = 6.0384e-04
Loss = 3.6698e-02, PNorm = 153.0990, GNorm = 0.5416, lr_0 = 6.0343e-04
Loss = 2.9887e-02, PNorm = 153.1356, GNorm = 0.2816, lr_0 = 6.0301e-04
Loss = 4.6460e-02, PNorm = 153.1716, GNorm = 0.7742, lr_0 = 6.0260e-04
Loss = 4.5073e-02, PNorm = 153.2109, GNorm = 0.9538, lr_0 = 6.0219e-04
Loss = 5.0467e-02, PNorm = 153.2567, GNorm = 0.5893, lr_0 = 6.0178e-04
Loss = 4.0246e-02, PNorm = 153.2949, GNorm = 0.3534, lr_0 = 6.0136e-04
Loss = 3.9230e-02, PNorm = 153.3339, GNorm = 0.4816, lr_0 = 6.0095e-04
Loss = 3.5098e-02, PNorm = 153.3735, GNorm = 0.2648, lr_0 = 6.0054e-04
Loss = 5.7652e-02, PNorm = 153.4117, GNorm = 0.4974, lr_0 = 6.0013e-04
Loss = 4.3846e-02, PNorm = 153.4527, GNorm = 0.5699, lr_0 = 5.9972e-04
Loss = 4.0845e-02, PNorm = 153.4877, GNorm = 0.5046, lr_0 = 5.9931e-04
Loss = 3.7026e-02, PNorm = 153.5307, GNorm = 0.5504, lr_0 = 5.9890e-04
Loss = 4.4613e-02, PNorm = 153.5726, GNorm = 0.6139, lr_0 = 5.9849e-04
Loss = 3.9380e-02, PNorm = 153.6146, GNorm = 0.7409, lr_0 = 5.9808e-04
Loss = 3.6705e-02, PNorm = 153.6593, GNorm = 0.4294, lr_0 = 5.9767e-04
Loss = 3.3166e-02, PNorm = 153.7012, GNorm = 0.3821, lr_0 = 5.9726e-04
Loss = 4.2950e-02, PNorm = 153.7414, GNorm = 0.6391, lr_0 = 5.9685e-04
Loss = 3.5298e-02, PNorm = 153.7800, GNorm = 0.3896, lr_0 = 5.9644e-04
Loss = 3.3751e-02, PNorm = 153.8239, GNorm = 0.2967, lr_0 = 5.9603e-04
Loss = 2.8396e-02, PNorm = 153.8556, GNorm = 0.4385, lr_0 = 5.9562e-04
Loss = 3.4355e-02, PNorm = 153.8906, GNorm = 0.2534, lr_0 = 5.9521e-04
Loss = 3.5224e-02, PNorm = 153.9324, GNorm = 0.5458, lr_0 = 5.9481e-04
Loss = 4.2161e-02, PNorm = 153.9739, GNorm = 0.7203, lr_0 = 5.9440e-04
Loss = 4.0727e-02, PNorm = 154.0163, GNorm = 0.4370, lr_0 = 5.9399e-04
Loss = 4.2513e-02, PNorm = 154.0645, GNorm = 0.5836, lr_0 = 5.9358e-04
Loss = 3.3958e-02, PNorm = 154.1033, GNorm = 0.5487, lr_0 = 5.9318e-04
Loss = 3.8774e-02, PNorm = 154.1437, GNorm = 0.2739, lr_0 = 5.9277e-04
Loss = 3.7166e-02, PNorm = 154.1825, GNorm = 0.5025, lr_0 = 5.9236e-04
Loss = 4.2489e-02, PNorm = 154.2293, GNorm = 0.2869, lr_0 = 5.9196e-04
Loss = 3.4255e-02, PNorm = 154.2734, GNorm = 0.5113, lr_0 = 5.9155e-04
Loss = 3.5806e-02, PNorm = 154.3221, GNorm = 0.3460, lr_0 = 5.9115e-04
Loss = 3.4840e-02, PNorm = 154.3647, GNorm = 0.7168, lr_0 = 5.9074e-04
Loss = 3.8403e-02, PNorm = 154.4052, GNorm = 0.3822, lr_0 = 5.9034e-04
Loss = 3.7169e-02, PNorm = 154.4486, GNorm = 0.5877, lr_0 = 5.8993e-04
Loss = 4.7354e-02, PNorm = 154.4902, GNorm = 0.4589, lr_0 = 5.8953e-04
Loss = 3.7610e-02, PNorm = 154.5346, GNorm = 0.9445, lr_0 = 5.8913e-04
Loss = 4.1376e-02, PNorm = 154.5812, GNorm = 0.7406, lr_0 = 5.8872e-04
Loss = 4.1163e-02, PNorm = 154.6301, GNorm = 0.5023, lr_0 = 5.8832e-04
Loss = 4.1346e-02, PNorm = 154.6814, GNorm = 0.2626, lr_0 = 5.8792e-04
Loss = 4.1052e-02, PNorm = 154.7297, GNorm = 0.3209, lr_0 = 5.8751e-04
Loss = 3.2691e-02, PNorm = 154.7767, GNorm = 0.7822, lr_0 = 5.8711e-04
Loss = 3.5468e-02, PNorm = 154.8217, GNorm = 0.5128, lr_0 = 5.8671e-04
Loss = 3.6384e-02, PNorm = 154.8642, GNorm = 0.2750, lr_0 = 5.8631e-04
Loss = 3.5331e-02, PNorm = 154.9092, GNorm = 0.4357, lr_0 = 5.8591e-04
Loss = 3.4936e-02, PNorm = 154.9505, GNorm = 0.2560, lr_0 = 5.8550e-04
Loss = 3.7788e-02, PNorm = 154.9951, GNorm = 0.3562, lr_0 = 5.8510e-04
Loss = 4.9912e-02, PNorm = 155.0386, GNorm = 1.1902, lr_0 = 5.8470e-04
Loss = 3.2850e-02, PNorm = 155.0904, GNorm = 0.3669, lr_0 = 5.8430e-04
Loss = 4.2803e-02, PNorm = 155.1399, GNorm = 0.6236, lr_0 = 5.8390e-04
Loss = 3.7780e-02, PNorm = 155.1874, GNorm = 0.7067, lr_0 = 5.8350e-04
Loss = 3.5278e-02, PNorm = 155.2337, GNorm = 0.6429, lr_0 = 5.8310e-04
Loss = 3.9272e-02, PNorm = 155.2796, GNorm = 0.4729, lr_0 = 5.8270e-04
Loss = 4.2073e-02, PNorm = 155.3269, GNorm = 0.4490, lr_0 = 5.8230e-04
Loss = 4.0460e-02, PNorm = 155.3744, GNorm = 0.6724, lr_0 = 5.8190e-04
Loss = 3.9954e-02, PNorm = 155.4168, GNorm = 0.3332, lr_0 = 5.8151e-04
Loss = 4.5613e-02, PNorm = 155.4596, GNorm = 0.3013, lr_0 = 5.8111e-04
Loss = 3.7930e-02, PNorm = 155.5080, GNorm = 0.4223, lr_0 = 5.8071e-04
Loss = 4.2447e-02, PNorm = 155.5559, GNorm = 0.3092, lr_0 = 5.8031e-04
Loss = 3.8444e-02, PNorm = 155.6062, GNorm = 0.9021, lr_0 = 5.7991e-04
Loss = 3.7137e-02, PNorm = 155.6555, GNorm = 0.4010, lr_0 = 5.7952e-04
Loss = 4.0711e-02, PNorm = 155.7037, GNorm = 0.4551, lr_0 = 5.7912e-04
Loss = 4.4817e-02, PNorm = 155.7530, GNorm = 0.8977, lr_0 = 5.7872e-04
Loss = 4.1424e-02, PNorm = 155.7967, GNorm = 0.4265, lr_0 = 5.7833e-04
Loss = 4.0860e-02, PNorm = 155.8416, GNorm = 0.7251, lr_0 = 5.7793e-04
Loss = 3.8567e-02, PNorm = 155.8847, GNorm = 0.5722, lr_0 = 5.7753e-04
Loss = 4.3464e-02, PNorm = 155.9326, GNorm = 0.4827, lr_0 = 5.7714e-04
Loss = 4.0677e-02, PNorm = 155.9749, GNorm = 0.6382, lr_0 = 5.7674e-04
Loss = 3.8513e-02, PNorm = 156.0215, GNorm = 0.3097, lr_0 = 5.7635e-04
Loss = 4.2450e-02, PNorm = 156.0709, GNorm = 0.3934, lr_0 = 5.7595e-04
Loss = 4.2131e-02, PNorm = 156.1220, GNorm = 0.4555, lr_0 = 5.7556e-04
Loss = 3.8444e-02, PNorm = 156.1694, GNorm = 0.6092, lr_0 = 5.7516e-04
Loss = 3.4464e-02, PNorm = 156.2174, GNorm = 0.3377, lr_0 = 5.7477e-04
Loss = 4.4852e-02, PNorm = 156.2695, GNorm = 0.8374, lr_0 = 5.7438e-04
Loss = 4.4807e-02, PNorm = 156.3241, GNorm = 0.3948, lr_0 = 5.7398e-04
Loss = 5.1265e-02, PNorm = 156.3790, GNorm = 0.4931, lr_0 = 5.7359e-04
Loss = 3.7209e-02, PNorm = 156.4296, GNorm = 0.3197, lr_0 = 5.7320e-04
Loss = 3.7074e-02, PNorm = 156.4758, GNorm = 0.4602, lr_0 = 5.7280e-04
Loss = 3.8390e-02, PNorm = 156.5209, GNorm = 0.3673, lr_0 = 5.7241e-04
Loss = 3.7581e-02, PNorm = 156.5715, GNorm = 0.3917, lr_0 = 5.7202e-04
Loss = 4.7276e-02, PNorm = 156.6275, GNorm = 0.6353, lr_0 = 5.7163e-04
Loss = 3.8177e-02, PNorm = 156.6777, GNorm = 0.3269, lr_0 = 5.7124e-04
Loss = 3.5619e-02, PNorm = 156.7256, GNorm = 0.4753, lr_0 = 5.7084e-04
Loss = 3.6092e-02, PNorm = 156.7769, GNorm = 0.3882, lr_0 = 5.7045e-04
Loss = 3.6164e-02, PNorm = 156.8247, GNorm = 0.3800, lr_0 = 5.7006e-04
Loss = 3.3514e-02, PNorm = 156.8710, GNorm = 0.5074, lr_0 = 5.6967e-04
Loss = 5.0773e-02, PNorm = 156.9160, GNorm = 0.7347, lr_0 = 5.6928e-04
Loss = 4.4560e-02, PNorm = 156.9579, GNorm = 0.3084, lr_0 = 5.6889e-04
Loss = 4.7235e-02, PNorm = 157.0027, GNorm = 0.5556, lr_0 = 5.6850e-04
Loss = 4.1134e-02, PNorm = 157.0560, GNorm = 0.5538, lr_0 = 5.6811e-04
Loss = 4.3798e-02, PNorm = 157.1107, GNorm = 0.5813, lr_0 = 5.6772e-04
Loss = 3.8834e-02, PNorm = 157.1626, GNorm = 0.8660, lr_0 = 5.6733e-04
Loss = 4.2363e-02, PNorm = 157.2154, GNorm = 0.4570, lr_0 = 5.6695e-04
Loss = 4.2765e-02, PNorm = 157.2665, GNorm = 0.4964, lr_0 = 5.6656e-04
Loss = 3.5295e-02, PNorm = 157.3179, GNorm = 0.4071, lr_0 = 5.6617e-04
Loss = 4.0006e-02, PNorm = 157.3664, GNorm = 0.3688, lr_0 = 5.6578e-04
Loss = 4.2291e-02, PNorm = 157.4069, GNorm = 0.9233, lr_0 = 5.6539e-04
Loss = 3.2872e-02, PNorm = 157.4538, GNorm = 0.2720, lr_0 = 5.6501e-04
Loss = 4.9161e-02, PNorm = 157.5007, GNorm = 0.6990, lr_0 = 5.6462e-04
Loss = 3.9596e-02, PNorm = 157.5449, GNorm = 0.7633, lr_0 = 5.6423e-04
Loss = 4.5011e-02, PNorm = 157.5931, GNorm = 0.4440, lr_0 = 5.6385e-04
Loss = 4.2076e-02, PNorm = 157.6450, GNorm = 0.5386, lr_0 = 5.6346e-04
Loss = 4.1494e-02, PNorm = 157.6978, GNorm = 0.7458, lr_0 = 5.6307e-04
Loss = 4.1511e-02, PNorm = 157.7464, GNorm = 0.5666, lr_0 = 5.6269e-04
Loss = 3.9695e-02, PNorm = 157.7947, GNorm = 0.5145, lr_0 = 5.6230e-04
Validation mae = 0.123250
Epoch 9
Loss = 4.1527e-02, PNorm = 157.8285, GNorm = 1.0072, lr_0 = 5.6192e-04
Loss = 3.6894e-02, PNorm = 157.8645, GNorm = 0.7067, lr_0 = 5.6153e-04
Loss = 3.1020e-02, PNorm = 157.8988, GNorm = 0.2517, lr_0 = 5.6115e-04
Loss = 3.0473e-02, PNorm = 157.9362, GNorm = 0.7084, lr_0 = 5.6076e-04
Loss = 3.1917e-02, PNorm = 157.9771, GNorm = 0.3430, lr_0 = 5.6038e-04
Loss = 3.2485e-02, PNorm = 158.0083, GNorm = 0.4695, lr_0 = 5.6000e-04
Loss = 3.3105e-02, PNorm = 158.0419, GNorm = 0.4497, lr_0 = 5.5961e-04
Loss = 3.4836e-02, PNorm = 158.0734, GNorm = 0.6839, lr_0 = 5.5923e-04
Loss = 2.9116e-02, PNorm = 158.1063, GNorm = 0.6206, lr_0 = 5.5885e-04
Loss = 3.8848e-02, PNorm = 158.1380, GNorm = 0.5470, lr_0 = 5.5846e-04
Loss = 3.2691e-02, PNorm = 158.1716, GNorm = 0.3380, lr_0 = 5.5808e-04
Loss = 3.6462e-02, PNorm = 158.2092, GNorm = 0.3399, lr_0 = 5.5770e-04
Loss = 3.0323e-02, PNorm = 158.2438, GNorm = 0.4317, lr_0 = 5.5732e-04
Loss = 3.2927e-02, PNorm = 158.2810, GNorm = 0.4234, lr_0 = 5.5693e-04
Loss = 2.9823e-02, PNorm = 158.3123, GNorm = 0.2169, lr_0 = 5.5655e-04
Loss = 3.0456e-02, PNorm = 158.3443, GNorm = 0.3505, lr_0 = 5.5617e-04
Loss = 3.1328e-02, PNorm = 158.3750, GNorm = 0.2654, lr_0 = 5.5579e-04
Loss = 3.8410e-02, PNorm = 158.4066, GNorm = 0.3393, lr_0 = 5.5541e-04
Loss = 2.5456e-02, PNorm = 158.4409, GNorm = 0.3574, lr_0 = 5.5503e-04
Loss = 3.9751e-02, PNorm = 158.4753, GNorm = 0.3210, lr_0 = 5.5465e-04
Loss = 3.3565e-02, PNorm = 158.5138, GNorm = 0.5565, lr_0 = 5.5427e-04
Loss = 2.9815e-02, PNorm = 158.5528, GNorm = 0.2920, lr_0 = 5.5389e-04
Loss = 3.4269e-02, PNorm = 158.5911, GNorm = 0.3060, lr_0 = 5.5351e-04
Loss = 2.8626e-02, PNorm = 158.6299, GNorm = 0.4727, lr_0 = 5.5313e-04
Loss = 2.4821e-02, PNorm = 158.6646, GNorm = 0.3351, lr_0 = 5.5275e-04
Loss = 2.7140e-02, PNorm = 158.6983, GNorm = 0.3614, lr_0 = 5.5237e-04
Loss = 3.5870e-02, PNorm = 158.7302, GNorm = 0.2611, lr_0 = 5.5199e-04
Loss = 2.8012e-02, PNorm = 158.7696, GNorm = 0.4491, lr_0 = 5.5162e-04
Loss = 2.7586e-02, PNorm = 158.8081, GNorm = 0.3073, lr_0 = 5.5124e-04
Loss = 3.9340e-02, PNorm = 158.8488, GNorm = 0.9489, lr_0 = 5.5086e-04
Loss = 3.4618e-02, PNorm = 158.8932, GNorm = 0.2672, lr_0 = 5.5048e-04
Loss = 2.9118e-02, PNorm = 158.9319, GNorm = 0.2704, lr_0 = 5.5011e-04
Loss = 2.5343e-02, PNorm = 158.9657, GNorm = 0.4866, lr_0 = 5.4973e-04
Loss = 2.7177e-02, PNorm = 158.9998, GNorm = 0.2423, lr_0 = 5.4935e-04
Loss = 3.2586e-02, PNorm = 159.0323, GNorm = 0.3119, lr_0 = 5.4898e-04
Loss = 3.0238e-02, PNorm = 159.0615, GNorm = 0.3475, lr_0 = 5.4860e-04
Loss = 3.1114e-02, PNorm = 159.0981, GNorm = 0.5558, lr_0 = 5.4822e-04
Loss = 3.0193e-02, PNorm = 159.1308, GNorm = 0.7265, lr_0 = 5.4785e-04
Loss = 2.8483e-02, PNorm = 159.1682, GNorm = 0.5051, lr_0 = 5.4747e-04
Loss = 3.4449e-02, PNorm = 159.2000, GNorm = 0.4531, lr_0 = 5.4710e-04
Loss = 2.8982e-02, PNorm = 159.2354, GNorm = 0.4511, lr_0 = 5.4672e-04
Loss = 3.2615e-02, PNorm = 159.2721, GNorm = 0.4640, lr_0 = 5.4635e-04
Loss = 2.8382e-02, PNorm = 159.3046, GNorm = 0.5536, lr_0 = 5.4597e-04
Loss = 4.6171e-02, PNorm = 159.3439, GNorm = 0.8244, lr_0 = 5.4560e-04
Loss = 3.0536e-02, PNorm = 159.3823, GNorm = 0.6100, lr_0 = 5.4523e-04
Loss = 3.1693e-02, PNorm = 159.4206, GNorm = 0.3256, lr_0 = 5.4485e-04
Loss = 2.7209e-02, PNorm = 159.4594, GNorm = 0.2488, lr_0 = 5.4448e-04
Loss = 3.0117e-02, PNorm = 159.4929, GNorm = 0.7866, lr_0 = 5.4411e-04
Loss = 3.3573e-02, PNorm = 159.5282, GNorm = 0.3040, lr_0 = 5.4373e-04
Loss = 3.1660e-02, PNorm = 159.5644, GNorm = 0.4523, lr_0 = 5.4336e-04
Loss = 3.3432e-02, PNorm = 159.6006, GNorm = 0.6892, lr_0 = 5.4299e-04
Loss = 3.4674e-02, PNorm = 159.6355, GNorm = 0.5630, lr_0 = 5.4262e-04
Loss = 3.0105e-02, PNorm = 159.6745, GNorm = 0.5574, lr_0 = 5.4225e-04
Loss = 3.2793e-02, PNorm = 159.7145, GNorm = 0.4195, lr_0 = 5.4187e-04
Loss = 3.1018e-02, PNorm = 159.7526, GNorm = 0.3422, lr_0 = 5.4150e-04
Loss = 3.6486e-02, PNorm = 159.7922, GNorm = 0.3284, lr_0 = 5.4113e-04
Loss = 3.7090e-02, PNorm = 159.8336, GNorm = 0.5692, lr_0 = 5.4076e-04
Loss = 3.7954e-02, PNorm = 159.8727, GNorm = 0.2752, lr_0 = 5.4039e-04
Loss = 3.7598e-02, PNorm = 159.9120, GNorm = 0.2989, lr_0 = 5.4002e-04
Loss = 3.5566e-02, PNorm = 159.9549, GNorm = 0.3745, lr_0 = 5.3965e-04
Loss = 3.2544e-02, PNorm = 159.9925, GNorm = 0.4660, lr_0 = 5.3928e-04
Loss = 3.2892e-02, PNorm = 160.0376, GNorm = 0.3270, lr_0 = 5.3891e-04
Loss = 2.9610e-02, PNorm = 160.0863, GNorm = 0.6428, lr_0 = 5.3854e-04
Loss = 2.6546e-02, PNorm = 160.1260, GNorm = 0.5262, lr_0 = 5.3817e-04
Loss = 4.5771e-02, PNorm = 160.1623, GNorm = 0.2353, lr_0 = 5.3781e-04
Loss = 2.5512e-02, PNorm = 160.2005, GNorm = 0.3281, lr_0 = 5.3744e-04
Loss = 4.0584e-02, PNorm = 160.2334, GNorm = 0.5810, lr_0 = 5.3707e-04
Loss = 3.5959e-02, PNorm = 160.2723, GNorm = 0.5501, lr_0 = 5.3670e-04
Loss = 3.3512e-02, PNorm = 160.3108, GNorm = 0.3219, lr_0 = 5.3633e-04
Loss = 3.1168e-02, PNorm = 160.3464, GNorm = 0.3756, lr_0 = 5.3597e-04
Loss = 2.8636e-02, PNorm = 160.3850, GNorm = 0.4580, lr_0 = 5.3560e-04
Loss = 3.2169e-02, PNorm = 160.4265, GNorm = 0.4207, lr_0 = 5.3523e-04
Loss = 3.3250e-02, PNorm = 160.4757, GNorm = 0.5789, lr_0 = 5.3486e-04
Loss = 3.1291e-02, PNorm = 160.5171, GNorm = 0.4401, lr_0 = 5.3450e-04
Loss = 3.5297e-02, PNorm = 160.5486, GNorm = 0.2458, lr_0 = 5.3413e-04
Loss = 4.0190e-02, PNorm = 160.5909, GNorm = 0.5858, lr_0 = 5.3377e-04
Loss = 3.0110e-02, PNorm = 160.6290, GNorm = 0.4333, lr_0 = 5.3340e-04
Loss = 3.1438e-02, PNorm = 160.6623, GNorm = 0.2648, lr_0 = 5.3304e-04
Loss = 2.6058e-02, PNorm = 160.6986, GNorm = 0.2152, lr_0 = 5.3267e-04
Loss = 3.7992e-02, PNorm = 160.7379, GNorm = 0.6314, lr_0 = 5.3231e-04
Loss = 2.9708e-02, PNorm = 160.7764, GNorm = 0.5825, lr_0 = 5.3194e-04
Loss = 3.4079e-02, PNorm = 160.8139, GNorm = 0.4899, lr_0 = 5.3158e-04
Loss = 3.6731e-02, PNorm = 160.8583, GNorm = 0.2695, lr_0 = 5.3121e-04
Loss = 4.4087e-02, PNorm = 160.9000, GNorm = 0.2900, lr_0 = 5.3085e-04
Loss = 3.1380e-02, PNorm = 160.9407, GNorm = 0.2434, lr_0 = 5.3048e-04
Loss = 3.4357e-02, PNorm = 160.9807, GNorm = 0.5838, lr_0 = 5.3012e-04
Loss = 3.5883e-02, PNorm = 161.0265, GNorm = 0.3805, lr_0 = 5.2976e-04
Loss = 3.2616e-02, PNorm = 161.0702, GNorm = 0.3679, lr_0 = 5.2939e-04
Loss = 2.8553e-02, PNorm = 161.1151, GNorm = 0.6073, lr_0 = 5.2903e-04
Loss = 3.1944e-02, PNorm = 161.1640, GNorm = 0.4918, lr_0 = 5.2867e-04
Loss = 4.6241e-02, PNorm = 161.2086, GNorm = 0.4343, lr_0 = 5.2831e-04
Loss = 3.9016e-02, PNorm = 161.2544, GNorm = 0.3437, lr_0 = 5.2795e-04
Loss = 3.1792e-02, PNorm = 161.2985, GNorm = 1.2643, lr_0 = 5.2758e-04
Loss = 3.5047e-02, PNorm = 161.3417, GNorm = 0.4369, lr_0 = 5.2722e-04
Loss = 3.1293e-02, PNorm = 161.3809, GNorm = 0.4716, lr_0 = 5.2686e-04
Loss = 3.1911e-02, PNorm = 161.4194, GNorm = 0.6730, lr_0 = 5.2650e-04
Loss = 2.7091e-02, PNorm = 161.4571, GNorm = 0.4290, lr_0 = 5.2614e-04
Loss = 3.4611e-02, PNorm = 161.4982, GNorm = 0.3438, lr_0 = 5.2578e-04
Loss = 3.6402e-02, PNorm = 161.5459, GNorm = 0.5060, lr_0 = 5.2542e-04
Loss = 3.7544e-02, PNorm = 161.5965, GNorm = 0.5131, lr_0 = 5.2506e-04
Loss = 4.5580e-02, PNorm = 161.6389, GNorm = 1.9145, lr_0 = 5.2470e-04
Loss = 3.1989e-02, PNorm = 161.6904, GNorm = 0.2738, lr_0 = 5.2434e-04
Loss = 4.4096e-02, PNorm = 161.7350, GNorm = 0.3272, lr_0 = 5.2398e-04
Loss = 3.3757e-02, PNorm = 161.7794, GNorm = 0.3520, lr_0 = 5.2362e-04
Loss = 3.1259e-02, PNorm = 161.8250, GNorm = 0.4722, lr_0 = 5.2326e-04
Loss = 3.4933e-02, PNorm = 161.8712, GNorm = 0.2521, lr_0 = 5.2290e-04
Loss = 3.5461e-02, PNorm = 161.9167, GNorm = 0.3736, lr_0 = 5.2255e-04
Loss = 2.5505e-02, PNorm = 161.9619, GNorm = 0.3017, lr_0 = 5.2219e-04
Loss = 3.0125e-02, PNorm = 162.0047, GNorm = 0.3443, lr_0 = 5.2183e-04
Loss = 2.8651e-02, PNorm = 162.0484, GNorm = 0.3849, lr_0 = 5.2147e-04
Loss = 3.8465e-02, PNorm = 162.0922, GNorm = 0.2462, lr_0 = 5.2112e-04
Loss = 4.2026e-02, PNorm = 162.1390, GNorm = 0.3321, lr_0 = 5.2076e-04
Loss = 4.9634e-02, PNorm = 162.1856, GNorm = 0.3783, lr_0 = 5.2040e-04
Loss = 3.5159e-02, PNorm = 162.2318, GNorm = 0.3639, lr_0 = 5.2005e-04
Loss = 2.8944e-02, PNorm = 162.2714, GNorm = 0.2162, lr_0 = 5.1969e-04
Loss = 2.8922e-02, PNorm = 162.3154, GNorm = 0.3719, lr_0 = 5.1933e-04
Loss = 2.8065e-02, PNorm = 162.3561, GNorm = 0.3141, lr_0 = 5.1898e-04
Loss = 3.7603e-02, PNorm = 162.3986, GNorm = 0.6184, lr_0 = 5.1862e-04
Loss = 3.7596e-02, PNorm = 162.4427, GNorm = 0.3328, lr_0 = 5.1827e-04
Loss = 3.4165e-02, PNorm = 162.4873, GNorm = 0.2812, lr_0 = 5.1791e-04
Validation mae = 0.122593
Epoch 10
Loss = 2.6500e-02, PNorm = 162.5247, GNorm = 0.4221, lr_0 = 5.1756e-04
Loss = 2.4145e-02, PNorm = 162.5549, GNorm = 0.4650, lr_0 = 5.1720e-04
Loss = 2.8908e-02, PNorm = 162.5838, GNorm = 0.4615, lr_0 = 5.1685e-04
Loss = 3.5286e-02, PNorm = 162.6114, GNorm = 0.4605, lr_0 = 5.1649e-04
Loss = 2.5171e-02, PNorm = 162.6409, GNorm = 0.4104, lr_0 = 5.1614e-04
Loss = 2.8265e-02, PNorm = 162.6670, GNorm = 0.2021, lr_0 = 5.1579e-04
Loss = 3.7465e-02, PNorm = 162.6972, GNorm = 0.3480, lr_0 = 5.1543e-04
Loss = 3.0558e-02, PNorm = 162.7254, GNorm = 0.3762, lr_0 = 5.1508e-04
Loss = 3.1340e-02, PNorm = 162.7501, GNorm = 0.4666, lr_0 = 5.1473e-04
Loss = 3.3063e-02, PNorm = 162.7786, GNorm = 0.5474, lr_0 = 5.1437e-04
Loss = 2.5913e-02, PNorm = 162.8090, GNorm = 0.2499, lr_0 = 5.1402e-04
Loss = 2.1938e-02, PNorm = 162.8385, GNorm = 0.5145, lr_0 = 5.1367e-04
Loss = 3.2823e-02, PNorm = 162.8631, GNorm = 0.3080, lr_0 = 5.1332e-04
Loss = 2.4964e-02, PNorm = 162.8927, GNorm = 0.4933, lr_0 = 5.1297e-04
Loss = 2.2463e-02, PNorm = 162.9243, GNorm = 0.2815, lr_0 = 5.1262e-04
Loss = 2.9400e-02, PNorm = 162.9534, GNorm = 0.5146, lr_0 = 5.1226e-04
Loss = 2.3405e-02, PNorm = 162.9805, GNorm = 0.2157, lr_0 = 5.1191e-04
Loss = 3.1419e-02, PNorm = 163.0085, GNorm = 0.6600, lr_0 = 5.1156e-04
Loss = 3.2484e-02, PNorm = 163.0397, GNorm = 0.5392, lr_0 = 5.1121e-04
Loss = 3.2937e-02, PNorm = 163.0736, GNorm = 0.3087, lr_0 = 5.1086e-04
Loss = 2.6811e-02, PNorm = 163.1065, GNorm = 0.3650, lr_0 = 5.1051e-04
Loss = 2.4559e-02, PNorm = 163.1365, GNorm = 0.2241, lr_0 = 5.1016e-04
Loss = 2.8394e-02, PNorm = 163.1652, GNorm = 0.2857, lr_0 = 5.0981e-04
Loss = 2.8236e-02, PNorm = 163.1968, GNorm = 0.2112, lr_0 = 5.0946e-04
Loss = 2.9986e-02, PNorm = 163.2263, GNorm = 0.3159, lr_0 = 5.0911e-04
Loss = 2.3743e-02, PNorm = 163.2590, GNorm = 0.3453, lr_0 = 5.0877e-04
Loss = 2.5470e-02, PNorm = 163.2914, GNorm = 0.6040, lr_0 = 5.0842e-04
Loss = 2.1781e-02, PNorm = 163.3185, GNorm = 0.1941, lr_0 = 5.0807e-04
Loss = 2.5407e-02, PNorm = 163.3473, GNorm = 0.1981, lr_0 = 5.0772e-04
Loss = 2.7213e-02, PNorm = 163.3757, GNorm = 0.8017, lr_0 = 5.0737e-04
Loss = 2.6188e-02, PNorm = 163.4033, GNorm = 0.3268, lr_0 = 5.0703e-04
Loss = 2.6556e-02, PNorm = 163.4356, GNorm = 0.2106, lr_0 = 5.0668e-04
Loss = 2.2389e-02, PNorm = 163.4669, GNorm = 0.5058, lr_0 = 5.0633e-04
Loss = 3.0321e-02, PNorm = 163.4967, GNorm = 0.5316, lr_0 = 5.0598e-04
Loss = 2.0440e-02, PNorm = 163.5294, GNorm = 0.2043, lr_0 = 5.0564e-04
Loss = 2.4982e-02, PNorm = 163.5578, GNorm = 0.2834, lr_0 = 5.0529e-04
Loss = 2.6342e-02, PNorm = 163.5881, GNorm = 0.3885, lr_0 = 5.0494e-04
Loss = 2.8630e-02, PNorm = 163.6224, GNorm = 0.3577, lr_0 = 5.0460e-04
Loss = 2.1395e-02, PNorm = 163.6525, GNorm = 0.3895, lr_0 = 5.0425e-04
Loss = 4.3054e-02, PNorm = 163.6870, GNorm = 0.2887, lr_0 = 5.0391e-04
Loss = 2.4126e-02, PNorm = 163.7249, GNorm = 0.3249, lr_0 = 5.0356e-04
Loss = 2.0919e-02, PNorm = 163.7564, GNorm = 0.4913, lr_0 = 5.0322e-04
Loss = 2.8091e-02, PNorm = 163.7873, GNorm = 0.2324, lr_0 = 5.0287e-04
Loss = 2.6532e-02, PNorm = 163.8204, GNorm = 0.3248, lr_0 = 5.0253e-04
Loss = 2.6347e-02, PNorm = 163.8558, GNorm = 0.5128, lr_0 = 5.0218e-04
Loss = 2.2799e-02, PNorm = 163.8871, GNorm = 0.2638, lr_0 = 5.0184e-04
Loss = 3.4731e-02, PNorm = 163.9204, GNorm = 0.6144, lr_0 = 5.0150e-04
Loss = 3.0507e-02, PNorm = 163.9549, GNorm = 0.2570, lr_0 = 5.0115e-04
Loss = 2.6068e-02, PNorm = 163.9887, GNorm = 0.5866, lr_0 = 5.0081e-04
Loss = 3.1276e-02, PNorm = 164.0198, GNorm = 0.5682, lr_0 = 5.0047e-04
Loss = 3.5510e-02, PNorm = 164.0554, GNorm = 0.3880, lr_0 = 5.0012e-04
Loss = 2.3869e-02, PNorm = 164.0882, GNorm = 0.3271, lr_0 = 4.9978e-04
Loss = 2.9455e-02, PNorm = 164.1195, GNorm = 0.3239, lr_0 = 4.9944e-04
Loss = 3.8200e-02, PNorm = 164.1521, GNorm = 0.3857, lr_0 = 4.9910e-04
Loss = 2.2202e-02, PNorm = 164.1850, GNorm = 0.2523, lr_0 = 4.9875e-04
Loss = 2.7067e-02, PNorm = 164.2160, GNorm = 0.4155, lr_0 = 4.9841e-04
Loss = 2.9476e-02, PNorm = 164.2493, GNorm = 0.3376, lr_0 = 4.9807e-04
Loss = 2.4089e-02, PNorm = 164.2816, GNorm = 0.2247, lr_0 = 4.9773e-04
Loss = 2.3678e-02, PNorm = 164.3143, GNorm = 0.3526, lr_0 = 4.9739e-04
Loss = 2.8340e-02, PNorm = 164.3474, GNorm = 0.4609, lr_0 = 4.9705e-04
Loss = 2.5624e-02, PNorm = 164.3805, GNorm = 0.5356, lr_0 = 4.9671e-04
Loss = 2.6399e-02, PNorm = 164.4163, GNorm = 0.2614, lr_0 = 4.9637e-04
Loss = 3.5755e-02, PNorm = 164.4534, GNorm = 0.2026, lr_0 = 4.9603e-04
Loss = 2.3049e-02, PNorm = 164.4948, GNorm = 0.2239, lr_0 = 4.9569e-04
Loss = 2.5612e-02, PNorm = 164.5296, GNorm = 0.2140, lr_0 = 4.9535e-04
Loss = 3.0327e-02, PNorm = 164.5635, GNorm = 0.3227, lr_0 = 4.9501e-04
Loss = 2.8706e-02, PNorm = 164.5955, GNorm = 0.3120, lr_0 = 4.9467e-04
Loss = 3.2006e-02, PNorm = 164.6286, GNorm = 0.4175, lr_0 = 4.9433e-04
Loss = 2.3627e-02, PNorm = 164.6661, GNorm = 0.3994, lr_0 = 4.9399e-04
Loss = 3.4527e-02, PNorm = 164.6992, GNorm = 0.8879, lr_0 = 4.9365e-04
Loss = 2.3047e-02, PNorm = 164.7323, GNorm = 0.3826, lr_0 = 4.9332e-04
Loss = 2.4455e-02, PNorm = 164.7644, GNorm = 0.2357, lr_0 = 4.9298e-04
Loss = 2.9837e-02, PNorm = 164.7948, GNorm = 0.4904, lr_0 = 4.9264e-04
Loss = 2.6777e-02, PNorm = 164.8357, GNorm = 0.4699, lr_0 = 4.9230e-04
Loss = 2.3213e-02, PNorm = 164.8758, GNorm = 0.2320, lr_0 = 4.9197e-04
Loss = 2.4093e-02, PNorm = 164.9167, GNorm = 0.2761, lr_0 = 4.9163e-04
Loss = 2.6395e-02, PNorm = 164.9510, GNorm = 0.8531, lr_0 = 4.9129e-04
Loss = 2.1106e-02, PNorm = 164.9816, GNorm = 0.5332, lr_0 = 4.9095e-04
Loss = 2.8821e-02, PNorm = 165.0149, GNorm = 0.2550, lr_0 = 4.9062e-04
Loss = 2.8432e-02, PNorm = 165.0466, GNorm = 0.3247, lr_0 = 4.9028e-04
Loss = 2.7810e-02, PNorm = 165.0806, GNorm = 0.2873, lr_0 = 4.8995e-04
Loss = 3.0188e-02, PNorm = 165.1174, GNorm = 0.2499, lr_0 = 4.8961e-04
Loss = 2.8091e-02, PNorm = 165.1533, GNorm = 0.2758, lr_0 = 4.8928e-04
Loss = 3.4554e-02, PNorm = 165.1851, GNorm = 1.2183, lr_0 = 4.8894e-04
Loss = 4.1631e-02, PNorm = 165.2194, GNorm = 0.5915, lr_0 = 4.8861e-04
Loss = 2.5872e-02, PNorm = 165.2562, GNorm = 0.2487, lr_0 = 4.8827e-04
Loss = 2.4725e-02, PNorm = 165.2986, GNorm = 0.4054, lr_0 = 4.8794e-04
Loss = 2.5536e-02, PNorm = 165.3324, GNorm = 0.6363, lr_0 = 4.8760e-04
Loss = 2.9028e-02, PNorm = 165.3629, GNorm = 0.4820, lr_0 = 4.8727e-04
Loss = 3.6774e-02, PNorm = 165.4003, GNorm = 0.4252, lr_0 = 4.8693e-04
Loss = 2.6486e-02, PNorm = 165.4396, GNorm = 0.3544, lr_0 = 4.8660e-04
Loss = 2.7364e-02, PNorm = 165.4773, GNorm = 0.2057, lr_0 = 4.8627e-04
Loss = 2.3441e-02, PNorm = 165.5133, GNorm = 0.4078, lr_0 = 4.8593e-04
Loss = 4.0515e-02, PNorm = 165.5469, GNorm = 0.5550, lr_0 = 4.8560e-04
Loss = 2.8103e-02, PNorm = 165.5791, GNorm = 0.2496, lr_0 = 4.8527e-04
Loss = 2.9915e-02, PNorm = 165.6154, GNorm = 0.3216, lr_0 = 4.8494e-04
Loss = 3.0950e-02, PNorm = 165.6527, GNorm = 0.2752, lr_0 = 4.8460e-04
Loss = 2.1677e-02, PNorm = 165.6852, GNorm = 0.2436, lr_0 = 4.8427e-04
Loss = 2.7477e-02, PNorm = 165.7198, GNorm = 0.3228, lr_0 = 4.8394e-04
Loss = 2.3455e-02, PNorm = 165.7501, GNorm = 0.5260, lr_0 = 4.8361e-04
Loss = 3.6693e-02, PNorm = 165.7867, GNorm = 0.3180, lr_0 = 4.8328e-04
Loss = 2.8384e-02, PNorm = 165.8223, GNorm = 0.3784, lr_0 = 4.8295e-04
Loss = 2.6235e-02, PNorm = 165.8589, GNorm = 0.3929, lr_0 = 4.8262e-04
Loss = 4.2421e-02, PNorm = 165.8957, GNorm = 4.3059, lr_0 = 4.8228e-04
Loss = 3.2277e-02, PNorm = 165.9246, GNorm = 0.3564, lr_0 = 4.8195e-04
Loss = 3.3346e-02, PNorm = 165.9659, GNorm = 0.5548, lr_0 = 4.8162e-04
Loss = 2.9615e-02, PNorm = 166.0020, GNorm = 0.5826, lr_0 = 4.8129e-04
Loss = 2.6061e-02, PNorm = 166.0410, GNorm = 0.2282, lr_0 = 4.8096e-04
Loss = 2.5524e-02, PNorm = 166.0783, GNorm = 0.5325, lr_0 = 4.8064e-04
Loss = 2.7410e-02, PNorm = 166.1145, GNorm = 0.2731, lr_0 = 4.8031e-04
Loss = 2.2416e-02, PNorm = 166.1511, GNorm = 0.2427, lr_0 = 4.7998e-04
Loss = 2.8697e-02, PNorm = 166.1857, GNorm = 0.3031, lr_0 = 4.7965e-04
Loss = 2.3110e-02, PNorm = 166.2142, GNorm = 0.2815, lr_0 = 4.7932e-04
Loss = 2.9180e-02, PNorm = 166.2434, GNorm = 0.4403, lr_0 = 4.7899e-04
Loss = 2.9312e-02, PNorm = 166.2805, GNorm = 0.3161, lr_0 = 4.7866e-04
Loss = 2.4716e-02, PNorm = 166.3174, GNorm = 0.8359, lr_0 = 4.7833e-04
Loss = 2.6672e-02, PNorm = 166.3544, GNorm = 0.1759, lr_0 = 4.7801e-04
Loss = 3.2148e-02, PNorm = 166.3913, GNorm = 0.6544, lr_0 = 4.7768e-04
Loss = 3.2565e-02, PNorm = 166.4291, GNorm = 0.4834, lr_0 = 4.7735e-04
Loss = 3.2500e-02, PNorm = 166.4704, GNorm = 0.3485, lr_0 = 4.7703e-04
Validation mae = 0.122390
Epoch 11
Loss = 2.3794e-02, PNorm = 166.5019, GNorm = 0.4924, lr_0 = 4.7670e-04
Loss = 3.1769e-02, PNorm = 166.5300, GNorm = 0.3884, lr_0 = 4.7637e-04
Loss = 2.8414e-02, PNorm = 166.5574, GNorm = 0.5168, lr_0 = 4.7605e-04
Loss = 2.2717e-02, PNorm = 166.5867, GNorm = 0.2254, lr_0 = 4.7572e-04
Loss = 1.8943e-02, PNorm = 166.6079, GNorm = 0.2006, lr_0 = 4.7539e-04
Loss = 2.3524e-02, PNorm = 166.6305, GNorm = 0.2006, lr_0 = 4.7507e-04
Loss = 2.3617e-02, PNorm = 166.6490, GNorm = 0.4288, lr_0 = 4.7474e-04
Loss = 2.3332e-02, PNorm = 166.6713, GNorm = 0.3916, lr_0 = 4.7442e-04
Loss = 2.2890e-02, PNorm = 166.6946, GNorm = 0.5071, lr_0 = 4.7409e-04
Loss = 2.3904e-02, PNorm = 166.7194, GNorm = 0.3546, lr_0 = 4.7377e-04
Loss = 3.5713e-02, PNorm = 166.7470, GNorm = 0.3539, lr_0 = 4.7344e-04
Loss = 1.9310e-02, PNorm = 166.7751, GNorm = 0.6943, lr_0 = 4.7312e-04
Loss = 2.2674e-02, PNorm = 166.7979, GNorm = 0.4267, lr_0 = 4.7279e-04
Loss = 2.8721e-02, PNorm = 166.8233, GNorm = 0.4442, lr_0 = 4.7247e-04
Loss = 2.3389e-02, PNorm = 166.8477, GNorm = 0.4691, lr_0 = 4.7215e-04
Loss = 2.4951e-02, PNorm = 166.8730, GNorm = 0.3359, lr_0 = 4.7182e-04
Loss = 2.1730e-02, PNorm = 166.9000, GNorm = 0.3064, lr_0 = 4.7150e-04
Loss = 2.3006e-02, PNorm = 166.9246, GNorm = 0.2861, lr_0 = 4.7118e-04
Loss = 2.4599e-02, PNorm = 166.9492, GNorm = 0.1261, lr_0 = 4.7085e-04
Loss = 2.1568e-02, PNorm = 166.9718, GNorm = 0.2531, lr_0 = 4.7053e-04
Loss = 2.0454e-02, PNorm = 166.9958, GNorm = 0.2059, lr_0 = 4.7021e-04
Loss = 2.4406e-02, PNorm = 167.0256, GNorm = 0.3583, lr_0 = 4.6989e-04
Loss = 2.5969e-02, PNorm = 167.0460, GNorm = 0.4854, lr_0 = 4.6957e-04
Loss = 2.3730e-02, PNorm = 167.0698, GNorm = 0.1954, lr_0 = 4.6924e-04
Loss = 2.0752e-02, PNorm = 167.0940, GNorm = 0.2791, lr_0 = 4.6892e-04
Loss = 2.3302e-02, PNorm = 167.1189, GNorm = 0.4835, lr_0 = 4.6860e-04
Loss = 2.1684e-02, PNorm = 167.1421, GNorm = 0.2831, lr_0 = 4.6828e-04
Loss = 1.5635e-02, PNorm = 167.1639, GNorm = 0.2902, lr_0 = 4.6796e-04
Loss = 2.7227e-02, PNorm = 167.1865, GNorm = 0.5094, lr_0 = 4.6764e-04
Loss = 2.1406e-02, PNorm = 167.2122, GNorm = 0.3251, lr_0 = 4.6732e-04
Loss = 2.4586e-02, PNorm = 167.2356, GNorm = 0.1861, lr_0 = 4.6700e-04
Loss = 2.3061e-02, PNorm = 167.2589, GNorm = 0.3789, lr_0 = 4.6668e-04
Loss = 1.9408e-02, PNorm = 167.2832, GNorm = 0.2703, lr_0 = 4.6636e-04
Loss = 2.2031e-02, PNorm = 167.3094, GNorm = 0.6184, lr_0 = 4.6604e-04
Loss = 1.8469e-02, PNorm = 167.3327, GNorm = 0.3322, lr_0 = 4.6572e-04
Loss = 2.4165e-02, PNorm = 167.3551, GNorm = 0.2239, lr_0 = 4.6540e-04
Loss = 2.0271e-02, PNorm = 167.3784, GNorm = 0.4619, lr_0 = 4.6508e-04
Loss = 1.7379e-02, PNorm = 167.4062, GNorm = 0.3202, lr_0 = 4.6476e-04
Loss = 2.2113e-02, PNorm = 167.4278, GNorm = 0.4605, lr_0 = 4.6445e-04
Loss = 2.4465e-02, PNorm = 167.4528, GNorm = 0.6478, lr_0 = 4.6413e-04
Loss = 2.5507e-02, PNorm = 167.4740, GNorm = 0.2158, lr_0 = 4.6381e-04
Loss = 2.0232e-02, PNorm = 167.5018, GNorm = 0.2901, lr_0 = 4.6349e-04
Loss = 2.9004e-02, PNorm = 167.5250, GNorm = 0.2776, lr_0 = 4.6317e-04
Loss = 1.9607e-02, PNorm = 167.5493, GNorm = 0.1903, lr_0 = 4.6286e-04
Loss = 2.2716e-02, PNorm = 167.5758, GNorm = 0.3389, lr_0 = 4.6254e-04
Loss = 2.4586e-02, PNorm = 167.6025, GNorm = 0.2083, lr_0 = 4.6222e-04
Loss = 1.9127e-02, PNorm = 167.6300, GNorm = 0.3020, lr_0 = 4.6191e-04
Loss = 2.3065e-02, PNorm = 167.6562, GNorm = 0.5128, lr_0 = 4.6159e-04
Loss = 1.9321e-02, PNorm = 167.6833, GNorm = 0.8623, lr_0 = 4.6127e-04
Loss = 2.2252e-02, PNorm = 167.7112, GNorm = 0.2321, lr_0 = 4.6096e-04
Loss = 2.3872e-02, PNorm = 167.7371, GNorm = 0.3055, lr_0 = 4.6064e-04
Loss = 2.6806e-02, PNorm = 167.7674, GNorm = 0.2686, lr_0 = 4.6033e-04
Loss = 2.2118e-02, PNorm = 167.7891, GNorm = 0.2037, lr_0 = 4.6001e-04
Loss = 2.2463e-02, PNorm = 167.8134, GNorm = 0.2879, lr_0 = 4.5970e-04
Loss = 1.7464e-02, PNorm = 167.8380, GNorm = 0.1958, lr_0 = 4.5938e-04
Loss = 2.0604e-02, PNorm = 167.8599, GNorm = 0.2169, lr_0 = 4.5907e-04
Loss = 2.2966e-02, PNorm = 167.8840, GNorm = 0.4680, lr_0 = 4.5875e-04
Loss = 1.9644e-02, PNorm = 167.9092, GNorm = 0.1811, lr_0 = 4.5844e-04
Loss = 2.8872e-02, PNorm = 167.9352, GNorm = 0.3300, lr_0 = 4.5812e-04
Loss = 2.0757e-02, PNorm = 167.9634, GNorm = 0.1793, lr_0 = 4.5781e-04
Loss = 2.4455e-02, PNorm = 167.9913, GNorm = 0.4796, lr_0 = 4.5750e-04
Loss = 2.4513e-02, PNorm = 168.0191, GNorm = 0.2389, lr_0 = 4.5718e-04
Loss = 2.6840e-02, PNorm = 168.0485, GNorm = 0.5873, lr_0 = 4.5687e-04
Loss = 2.0923e-02, PNorm = 168.0745, GNorm = 0.2482, lr_0 = 4.5656e-04
Loss = 2.2857e-02, PNorm = 168.1046, GNorm = 0.3679, lr_0 = 4.5624e-04
Loss = 1.7307e-02, PNorm = 168.1343, GNorm = 0.5398, lr_0 = 4.5593e-04
Loss = 2.2963e-02, PNorm = 168.1629, GNorm = 0.4386, lr_0 = 4.5562e-04
Loss = 2.3025e-02, PNorm = 168.1939, GNorm = 0.2849, lr_0 = 4.5531e-04
Loss = 2.2662e-02, PNorm = 168.2243, GNorm = 0.2738, lr_0 = 4.5499e-04
Loss = 1.8554e-02, PNorm = 168.2483, GNorm = 0.4341, lr_0 = 4.5468e-04
Loss = 2.9564e-02, PNorm = 168.2727, GNorm = 0.6432, lr_0 = 4.5437e-04
Loss = 2.3404e-02, PNorm = 168.2984, GNorm = 0.3328, lr_0 = 4.5406e-04
Loss = 2.1398e-02, PNorm = 168.3280, GNorm = 0.3921, lr_0 = 4.5375e-04
Loss = 2.1835e-02, PNorm = 168.3600, GNorm = 0.1723, lr_0 = 4.5344e-04
Loss = 3.1768e-02, PNorm = 168.3884, GNorm = 0.3924, lr_0 = 4.5313e-04
Loss = 2.3212e-02, PNorm = 168.4209, GNorm = 0.5445, lr_0 = 4.5282e-04
Loss = 2.1844e-02, PNorm = 168.4528, GNorm = 0.4391, lr_0 = 4.5251e-04
Loss = 2.3750e-02, PNorm = 168.4802, GNorm = 0.2236, lr_0 = 4.5220e-04
Loss = 2.4363e-02, PNorm = 168.5099, GNorm = 0.3629, lr_0 = 4.5189e-04
Loss = 3.1008e-02, PNorm = 168.5359, GNorm = 0.3160, lr_0 = 4.5158e-04
Loss = 2.0356e-02, PNorm = 168.5680, GNorm = 0.2746, lr_0 = 4.5127e-04
Loss = 1.9236e-02, PNorm = 168.5949, GNorm = 0.3659, lr_0 = 4.5096e-04
Loss = 2.1109e-02, PNorm = 168.6243, GNorm = 0.4615, lr_0 = 4.5065e-04
Loss = 2.3285e-02, PNorm = 168.6518, GNorm = 0.3684, lr_0 = 4.5034e-04
Loss = 2.0653e-02, PNorm = 168.6810, GNorm = 0.4909, lr_0 = 4.5003e-04
Loss = 2.4031e-02, PNorm = 168.7113, GNorm = 0.3153, lr_0 = 4.4972e-04
Loss = 1.7987e-02, PNorm = 168.7391, GNorm = 0.3619, lr_0 = 4.4942e-04
Loss = 3.1767e-02, PNorm = 168.7711, GNorm = 0.3179, lr_0 = 4.4911e-04
Loss = 2.4930e-02, PNorm = 168.8018, GNorm = 0.2986, lr_0 = 4.4880e-04
Loss = 1.9050e-02, PNorm = 168.8318, GNorm = 0.3266, lr_0 = 4.4849e-04
Loss = 2.1570e-02, PNorm = 168.8605, GNorm = 0.3284, lr_0 = 4.4819e-04
Loss = 2.6090e-02, PNorm = 168.8891, GNorm = 0.2216, lr_0 = 4.4788e-04
Loss = 2.3335e-02, PNorm = 168.9191, GNorm = 0.2608, lr_0 = 4.4757e-04
Loss = 1.8438e-02, PNorm = 168.9478, GNorm = 0.3898, lr_0 = 4.4727e-04
Loss = 2.4006e-02, PNorm = 168.9769, GNorm = 0.4015, lr_0 = 4.4696e-04
Loss = 2.1986e-02, PNorm = 169.0060, GNorm = 0.2683, lr_0 = 4.4665e-04
Loss = 2.1987e-02, PNorm = 169.0348, GNorm = 0.2300, lr_0 = 4.4635e-04
Loss = 2.6107e-02, PNorm = 169.0640, GNorm = 0.2713, lr_0 = 4.4604e-04
Loss = 2.2798e-02, PNorm = 169.0936, GNorm = 0.4561, lr_0 = 4.4574e-04
Loss = 2.6362e-02, PNorm = 169.1202, GNorm = 0.3072, lr_0 = 4.4543e-04
Loss = 2.2293e-02, PNorm = 169.1397, GNorm = 0.5357, lr_0 = 4.4513e-04
Loss = 2.6271e-02, PNorm = 169.1624, GNorm = 0.5359, lr_0 = 4.4482e-04
Loss = 1.8695e-02, PNorm = 169.1904, GNorm = 0.3321, lr_0 = 4.4452e-04
Loss = 2.1422e-02, PNorm = 169.2172, GNorm = 0.2371, lr_0 = 4.4421e-04
Loss = 2.8915e-02, PNorm = 169.2429, GNorm = 0.3942, lr_0 = 4.4391e-04
Loss = 2.0029e-02, PNorm = 169.2735, GNorm = 0.5429, lr_0 = 4.4360e-04
Loss = 2.0218e-02, PNorm = 169.3020, GNorm = 0.2635, lr_0 = 4.4330e-04
Loss = 2.4047e-02, PNorm = 169.3328, GNorm = 0.2387, lr_0 = 4.4299e-04
Loss = 4.2726e-02, PNorm = 169.3619, GNorm = 0.3487, lr_0 = 4.4269e-04
Loss = 2.9673e-02, PNorm = 169.3897, GNorm = 0.4331, lr_0 = 4.4239e-04
Loss = 2.1182e-02, PNorm = 169.4195, GNorm = 0.4367, lr_0 = 4.4209e-04
Loss = 2.2415e-02, PNorm = 169.4507, GNorm = 0.3786, lr_0 = 4.4178e-04
Loss = 3.0203e-02, PNorm = 169.4786, GNorm = 0.4301, lr_0 = 4.4148e-04
Loss = 2.8895e-02, PNorm = 169.5066, GNorm = 0.2224, lr_0 = 4.4118e-04
Loss = 3.1854e-02, PNorm = 169.5384, GNorm = 0.4128, lr_0 = 4.4088e-04
Loss = 2.8965e-02, PNorm = 169.5787, GNorm = 0.9161, lr_0 = 4.4057e-04
Loss = 2.3874e-02, PNorm = 169.6123, GNorm = 0.3485, lr_0 = 4.4027e-04
Loss = 2.3114e-02, PNorm = 169.6432, GNorm = 0.3261, lr_0 = 4.3997e-04
Loss = 2.2528e-02, PNorm = 169.6725, GNorm = 0.3680, lr_0 = 4.3967e-04
Loss = 2.6842e-02, PNorm = 169.7051, GNorm = 0.5530, lr_0 = 4.3937e-04
Validation mae = 0.122188
Epoch 12
Loss = 2.3454e-02, PNorm = 169.7299, GNorm = 0.7028, lr_0 = 4.3907e-04
Loss = 2.3348e-02, PNorm = 169.7526, GNorm = 0.7401, lr_0 = 4.3877e-04
Loss = 1.5985e-02, PNorm = 169.7740, GNorm = 0.2106, lr_0 = 4.3846e-04
Loss = 2.4362e-02, PNorm = 169.7982, GNorm = 0.9553, lr_0 = 4.3816e-04
Loss = 2.0525e-02, PNorm = 169.8197, GNorm = 0.2400, lr_0 = 4.3786e-04
Loss = 2.1887e-02, PNorm = 169.8434, GNorm = 0.1926, lr_0 = 4.3756e-04
Loss = 2.1261e-02, PNorm = 169.8638, GNorm = 0.3000, lr_0 = 4.3726e-04
Loss = 1.6542e-02, PNorm = 169.8849, GNorm = 0.2583, lr_0 = 4.3696e-04
Loss = 2.1464e-02, PNorm = 169.9028, GNorm = 0.1826, lr_0 = 4.3667e-04
Loss = 1.6772e-02, PNorm = 169.9206, GNorm = 0.1976, lr_0 = 4.3637e-04
Loss = 1.7091e-02, PNorm = 169.9415, GNorm = 0.2545, lr_0 = 4.3607e-04
Loss = 1.9899e-02, PNorm = 169.9674, GNorm = 0.4124, lr_0 = 4.3577e-04
Loss = 1.6929e-02, PNorm = 169.9883, GNorm = 0.4139, lr_0 = 4.3547e-04
Loss = 2.3105e-02, PNorm = 170.0093, GNorm = 0.3749, lr_0 = 4.3517e-04
Loss = 1.7772e-02, PNorm = 170.0323, GNorm = 0.2415, lr_0 = 4.3487e-04
Loss = 1.9989e-02, PNorm = 170.0494, GNorm = 0.2590, lr_0 = 4.3458e-04
Loss = 1.5218e-02, PNorm = 170.0689, GNorm = 0.2227, lr_0 = 4.3428e-04
Loss = 1.8018e-02, PNorm = 170.0861, GNorm = 0.4464, lr_0 = 4.3398e-04
Loss = 2.3407e-02, PNorm = 170.1056, GNorm = 0.3603, lr_0 = 4.3368e-04
Loss = 1.9925e-02, PNorm = 170.1248, GNorm = 0.2531, lr_0 = 4.3339e-04
Loss = 2.1043e-02, PNorm = 170.1467, GNorm = 0.2980, lr_0 = 4.3309e-04
Loss = 1.4597e-02, PNorm = 170.1666, GNorm = 0.1416, lr_0 = 4.3279e-04
Loss = 2.1402e-02, PNorm = 170.1836, GNorm = 0.2297, lr_0 = 4.3250e-04
Loss = 1.6690e-02, PNorm = 170.2052, GNorm = 0.3346, lr_0 = 4.3220e-04
Loss = 1.7999e-02, PNorm = 170.2234, GNorm = 0.4275, lr_0 = 4.3190e-04
Loss = 2.1458e-02, PNorm = 170.2408, GNorm = 0.3003, lr_0 = 4.3161e-04
Loss = 1.6090e-02, PNorm = 170.2598, GNorm = 0.2031, lr_0 = 4.3131e-04
Loss = 1.5462e-02, PNorm = 170.2810, GNorm = 0.5815, lr_0 = 4.3102e-04
Loss = 2.0609e-02, PNorm = 170.3010, GNorm = 0.2251, lr_0 = 4.3072e-04
Loss = 1.6802e-02, PNorm = 170.3203, GNorm = 0.2644, lr_0 = 4.3043e-04
Loss = 1.7680e-02, PNorm = 170.3400, GNorm = 0.3348, lr_0 = 4.3013e-04
Loss = 1.9868e-02, PNorm = 170.3568, GNorm = 0.2367, lr_0 = 4.2984e-04
Loss = 1.7524e-02, PNorm = 170.3768, GNorm = 0.2583, lr_0 = 4.2954e-04
Loss = 1.6596e-02, PNorm = 170.3927, GNorm = 0.2104, lr_0 = 4.2925e-04
Loss = 2.1412e-02, PNorm = 170.4098, GNorm = 0.2281, lr_0 = 4.2895e-04
Loss = 2.1870e-02, PNorm = 170.4305, GNorm = 0.1987, lr_0 = 4.2866e-04
Loss = 1.4773e-02, PNorm = 170.4502, GNorm = 0.3439, lr_0 = 4.2837e-04
Loss = 1.9856e-02, PNorm = 170.4696, GNorm = 0.6008, lr_0 = 4.2807e-04
Loss = 1.8298e-02, PNorm = 170.4915, GNorm = 0.2281, lr_0 = 4.2778e-04
Loss = 1.8216e-02, PNorm = 170.5152, GNorm = 0.2104, lr_0 = 4.2749e-04
Loss = 1.7873e-02, PNorm = 170.5349, GNorm = 0.5096, lr_0 = 4.2719e-04
Loss = 1.7943e-02, PNorm = 170.5524, GNorm = 0.1440, lr_0 = 4.2690e-04
Loss = 1.9036e-02, PNorm = 170.5741, GNorm = 0.2700, lr_0 = 4.2661e-04
Loss = 2.2299e-02, PNorm = 170.5995, GNorm = 0.4205, lr_0 = 4.2632e-04
Loss = 1.6519e-02, PNorm = 170.6186, GNorm = 0.3594, lr_0 = 4.2602e-04
Loss = 2.2402e-02, PNorm = 170.6422, GNorm = 0.5715, lr_0 = 4.2573e-04
Loss = 2.0349e-02, PNorm = 170.6706, GNorm = 0.9103, lr_0 = 4.2544e-04
Loss = 1.6210e-02, PNorm = 170.6914, GNorm = 0.1630, lr_0 = 4.2515e-04
Loss = 1.7727e-02, PNorm = 170.7108, GNorm = 0.6097, lr_0 = 4.2486e-04
Loss = 1.7868e-02, PNorm = 170.7304, GNorm = 0.2706, lr_0 = 4.2457e-04
Loss = 2.1761e-02, PNorm = 170.7468, GNorm = 0.3005, lr_0 = 4.2428e-04
Loss = 1.7246e-02, PNorm = 170.7639, GNorm = 0.2424, lr_0 = 4.2399e-04
Loss = 2.6571e-02, PNorm = 170.7810, GNorm = 0.2613, lr_0 = 4.2370e-04
Loss = 1.9776e-02, PNorm = 170.8021, GNorm = 0.3580, lr_0 = 4.2340e-04
Loss = 1.7245e-02, PNorm = 170.8220, GNorm = 0.3650, lr_0 = 4.2311e-04
Loss = 1.9817e-02, PNorm = 170.8403, GNorm = 0.1709, lr_0 = 4.2283e-04
Loss = 1.7212e-02, PNorm = 170.8594, GNorm = 0.1983, lr_0 = 4.2254e-04
Loss = 1.9365e-02, PNorm = 170.8782, GNorm = 0.4260, lr_0 = 4.2225e-04
Loss = 2.3019e-02, PNorm = 170.9030, GNorm = 0.6641, lr_0 = 4.2196e-04
Loss = 1.6581e-02, PNorm = 170.9308, GNorm = 0.2792, lr_0 = 4.2167e-04
Loss = 1.6701e-02, PNorm = 170.9519, GNorm = 0.3399, lr_0 = 4.2138e-04
Loss = 1.7157e-02, PNorm = 170.9724, GNorm = 0.2502, lr_0 = 4.2109e-04
Loss = 2.3858e-02, PNorm = 170.9934, GNorm = 0.1953, lr_0 = 4.2080e-04
Loss = 2.1936e-02, PNorm = 171.0211, GNorm = 0.3384, lr_0 = 4.2051e-04
Loss = 2.0954e-02, PNorm = 171.0508, GNorm = 0.4170, lr_0 = 4.2023e-04
Loss = 1.5253e-02, PNorm = 171.0733, GNorm = 0.5048, lr_0 = 4.1994e-04
Loss = 2.3395e-02, PNorm = 171.0958, GNorm = 0.2599, lr_0 = 4.1965e-04
Loss = 1.5982e-02, PNorm = 171.1193, GNorm = 0.1916, lr_0 = 4.1936e-04
Loss = 1.3153e-02, PNorm = 171.1403, GNorm = 0.2275, lr_0 = 4.1907e-04
Loss = 1.8926e-02, PNorm = 171.1608, GNorm = 0.1807, lr_0 = 4.1879e-04
Loss = 2.2281e-02, PNorm = 171.1843, GNorm = 0.1909, lr_0 = 4.1850e-04
Loss = 1.7126e-02, PNorm = 171.2050, GNorm = 0.4868, lr_0 = 4.1821e-04
Loss = 2.1242e-02, PNorm = 171.2254, GNorm = 0.1822, lr_0 = 4.1793e-04
Loss = 1.8562e-02, PNorm = 171.2469, GNorm = 0.2428, lr_0 = 4.1764e-04
Loss = 1.8227e-02, PNorm = 171.2712, GNorm = 0.5788, lr_0 = 4.1736e-04
Loss = 1.9024e-02, PNorm = 171.2920, GNorm = 0.3704, lr_0 = 4.1707e-04
Loss = 2.8165e-02, PNorm = 171.3145, GNorm = 0.2776, lr_0 = 4.1678e-04
Loss = 1.3078e-02, PNorm = 171.3385, GNorm = 0.3101, lr_0 = 4.1650e-04
Loss = 2.4117e-02, PNorm = 171.3611, GNorm = 0.7198, lr_0 = 4.1621e-04
Loss = 2.8878e-02, PNorm = 171.3872, GNorm = 1.9141, lr_0 = 4.1593e-04
Loss = 2.0800e-02, PNorm = 171.4101, GNorm = 0.4585, lr_0 = 4.1564e-04
Loss = 2.9312e-02, PNorm = 171.4337, GNorm = 0.4345, lr_0 = 4.1536e-04
Loss = 1.7374e-02, PNorm = 171.4569, GNorm = 0.2465, lr_0 = 4.1507e-04
Loss = 2.0998e-02, PNorm = 171.4812, GNorm = 0.1668, lr_0 = 4.1479e-04
Loss = 1.6294e-02, PNorm = 171.5051, GNorm = 0.2398, lr_0 = 4.1450e-04
Loss = 2.1243e-02, PNorm = 171.5304, GNorm = 0.2888, lr_0 = 4.1422e-04
Loss = 1.8955e-02, PNorm = 171.5566, GNorm = 0.4290, lr_0 = 4.1394e-04
Loss = 1.7459e-02, PNorm = 171.5866, GNorm = 0.1996, lr_0 = 4.1365e-04
Loss = 2.4135e-02, PNorm = 171.6118, GNorm = 0.3641, lr_0 = 4.1337e-04
Loss = 1.8163e-02, PNorm = 171.6388, GNorm = 0.1993, lr_0 = 4.1309e-04
Loss = 1.9352e-02, PNorm = 171.6636, GNorm = 0.2598, lr_0 = 4.1280e-04
Loss = 1.4860e-02, PNorm = 171.6902, GNorm = 0.3973, lr_0 = 4.1252e-04
Loss = 2.0806e-02, PNorm = 171.7137, GNorm = 0.4380, lr_0 = 4.1224e-04
Loss = 1.7396e-02, PNorm = 171.7350, GNorm = 0.2709, lr_0 = 4.1196e-04
Loss = 1.5964e-02, PNorm = 171.7588, GNorm = 0.1831, lr_0 = 4.1167e-04
Loss = 2.1837e-02, PNorm = 171.7814, GNorm = 0.3006, lr_0 = 4.1139e-04
Loss = 2.5953e-02, PNorm = 171.8080, GNorm = 0.6748, lr_0 = 4.1111e-04
Loss = 1.9667e-02, PNorm = 171.8297, GNorm = 0.5220, lr_0 = 4.1083e-04
Loss = 2.0702e-02, PNorm = 171.8490, GNorm = 0.2536, lr_0 = 4.1055e-04
Loss = 1.6435e-02, PNorm = 171.8735, GNorm = 0.1928, lr_0 = 4.1027e-04
Loss = 1.7683e-02, PNorm = 171.8998, GNorm = 0.1964, lr_0 = 4.0998e-04
Loss = 1.7303e-02, PNorm = 171.9232, GNorm = 0.2662, lr_0 = 4.0970e-04
Loss = 1.6134e-02, PNorm = 171.9471, GNorm = 0.3271, lr_0 = 4.0942e-04
Loss = 1.5194e-02, PNorm = 171.9688, GNorm = 0.3116, lr_0 = 4.0914e-04
Loss = 1.8844e-02, PNorm = 171.9896, GNorm = 0.1909, lr_0 = 4.0886e-04
Loss = 1.8229e-02, PNorm = 172.0124, GNorm = 0.2236, lr_0 = 4.0858e-04
Loss = 2.0609e-02, PNorm = 172.0357, GNorm = 0.6315, lr_0 = 4.0830e-04
Loss = 1.6898e-02, PNorm = 172.0566, GNorm = 0.3103, lr_0 = 4.0802e-04
Loss = 1.7369e-02, PNorm = 172.0771, GNorm = 0.3451, lr_0 = 4.0774e-04
Loss = 2.5194e-02, PNorm = 172.1059, GNorm = 0.6122, lr_0 = 4.0746e-04
Loss = 2.1563e-02, PNorm = 172.1323, GNorm = 0.4301, lr_0 = 4.0718e-04
Loss = 1.8867e-02, PNorm = 172.1599, GNorm = 0.4674, lr_0 = 4.0691e-04
Loss = 1.5574e-02, PNorm = 172.1861, GNorm = 0.3902, lr_0 = 4.0663e-04
Loss = 2.5956e-02, PNorm = 172.2122, GNorm = 0.1516, lr_0 = 4.0635e-04
Loss = 1.4685e-02, PNorm = 172.2373, GNorm = 0.1850, lr_0 = 4.0607e-04
Loss = 1.6687e-02, PNorm = 172.2642, GNorm = 0.3004, lr_0 = 4.0579e-04
Loss = 1.7989e-02, PNorm = 172.2877, GNorm = 0.3865, lr_0 = 4.0551e-04
Loss = 2.0685e-02, PNorm = 172.3073, GNorm = 0.4668, lr_0 = 4.0524e-04
Loss = 3.8171e-02, PNorm = 172.3361, GNorm = 0.4497, lr_0 = 4.0496e-04
Loss = 1.8309e-02, PNorm = 172.3601, GNorm = 0.1707, lr_0 = 4.0468e-04
Validation mae = 0.121800
Epoch 13
Loss = 1.8769e-02, PNorm = 172.3816, GNorm = 0.2762, lr_0 = 4.0440e-04
Loss = 2.5136e-02, PNorm = 172.3933, GNorm = 0.4433, lr_0 = 4.0413e-04
Loss = 2.1172e-02, PNorm = 172.4109, GNorm = 0.3498, lr_0 = 4.0385e-04
Loss = 1.9561e-02, PNorm = 172.4340, GNorm = 0.3548, lr_0 = 4.0357e-04
Loss = 2.2695e-02, PNorm = 172.4566, GNorm = 0.1694, lr_0 = 4.0330e-04
Loss = 1.7964e-02, PNorm = 172.4763, GNorm = 0.2874, lr_0 = 4.0302e-04
Loss = 1.6219e-02, PNorm = 172.4965, GNorm = 0.3467, lr_0 = 4.0274e-04
Loss = 1.7129e-02, PNorm = 172.5147, GNorm = 0.3042, lr_0 = 4.0247e-04
Loss = 1.5215e-02, PNorm = 172.5379, GNorm = 0.2916, lr_0 = 4.0219e-04
Loss = 1.8378e-02, PNorm = 172.5594, GNorm = 0.1892, lr_0 = 4.0192e-04
Loss = 1.8493e-02, PNorm = 172.5765, GNorm = 0.2123, lr_0 = 4.0164e-04
Loss = 1.8004e-02, PNorm = 172.5963, GNorm = 0.4210, lr_0 = 4.0137e-04
Loss = 1.6948e-02, PNorm = 172.6200, GNorm = 0.5576, lr_0 = 4.0109e-04
Loss = 1.6689e-02, PNorm = 172.6443, GNorm = 0.3517, lr_0 = 4.0082e-04
Loss = 1.4471e-02, PNorm = 172.6607, GNorm = 0.2600, lr_0 = 4.0054e-04
Loss = 2.0875e-02, PNorm = 172.6764, GNorm = 0.3091, lr_0 = 4.0027e-04
Loss = 1.4823e-02, PNorm = 172.6933, GNorm = 0.3323, lr_0 = 3.9999e-04
Loss = 1.4821e-02, PNorm = 172.7093, GNorm = 0.1648, lr_0 = 3.9972e-04
Loss = 1.6130e-02, PNorm = 172.7243, GNorm = 0.3646, lr_0 = 3.9945e-04
Loss = 1.4776e-02, PNorm = 172.7439, GNorm = 0.4165, lr_0 = 3.9917e-04
Loss = 1.8909e-02, PNorm = 172.7613, GNorm = 0.2549, lr_0 = 3.9890e-04
Loss = 1.7681e-02, PNorm = 172.7801, GNorm = 0.4592, lr_0 = 3.9863e-04
Loss = 1.7124e-02, PNorm = 172.7981, GNorm = 0.1897, lr_0 = 3.9835e-04
Loss = 1.3350e-02, PNorm = 172.8105, GNorm = 0.3733, lr_0 = 3.9808e-04
Loss = 1.3482e-02, PNorm = 172.8245, GNorm = 0.3745, lr_0 = 3.9781e-04
Loss = 1.5328e-02, PNorm = 172.8382, GNorm = 0.3887, lr_0 = 3.9753e-04
Loss = 1.3527e-02, PNorm = 172.8547, GNorm = 0.4861, lr_0 = 3.9726e-04
Loss = 1.6367e-02, PNorm = 172.8693, GNorm = 0.1984, lr_0 = 3.9699e-04
Loss = 1.6706e-02, PNorm = 172.8839, GNorm = 0.2196, lr_0 = 3.9672e-04
Loss = 1.3567e-02, PNorm = 172.9018, GNorm = 0.2153, lr_0 = 3.9645e-04
Loss = 1.6648e-02, PNorm = 172.9183, GNorm = 0.2816, lr_0 = 3.9617e-04
Loss = 1.5259e-02, PNorm = 172.9371, GNorm = 0.2112, lr_0 = 3.9590e-04
Loss = 1.4008e-02, PNorm = 172.9547, GNorm = 0.2962, lr_0 = 3.9563e-04
Loss = 1.2160e-02, PNorm = 172.9699, GNorm = 0.2516, lr_0 = 3.9536e-04
Loss = 1.2366e-02, PNorm = 172.9851, GNorm = 0.1684, lr_0 = 3.9509e-04
Loss = 1.6513e-02, PNorm = 172.9992, GNorm = 0.6160, lr_0 = 3.9482e-04
Loss = 1.6282e-02, PNorm = 173.0162, GNorm = 0.2075, lr_0 = 3.9455e-04
Loss = 1.3247e-02, PNorm = 173.0331, GNorm = 0.2265, lr_0 = 3.9428e-04
Loss = 1.3788e-02, PNorm = 173.0498, GNorm = 0.3972, lr_0 = 3.9401e-04
Loss = 1.5624e-02, PNorm = 173.0655, GNorm = 0.1916, lr_0 = 3.9374e-04
Loss = 1.2734e-02, PNorm = 173.0818, GNorm = 0.2304, lr_0 = 3.9347e-04
Loss = 1.5706e-02, PNorm = 173.1001, GNorm = 0.5380, lr_0 = 3.9320e-04
Loss = 2.5273e-02, PNorm = 173.1209, GNorm = 1.0631, lr_0 = 3.9293e-04
Loss = 1.7109e-02, PNorm = 173.1355, GNorm = 0.1880, lr_0 = 3.9266e-04
Loss = 1.8790e-02, PNorm = 173.1478, GNorm = 0.1645, lr_0 = 3.9239e-04
Loss = 2.1432e-02, PNorm = 173.1658, GNorm = 0.5194, lr_0 = 3.9212e-04
Loss = 1.1107e-02, PNorm = 173.1833, GNorm = 0.1642, lr_0 = 3.9185e-04
Loss = 1.3042e-02, PNorm = 173.2010, GNorm = 0.4911, lr_0 = 3.9159e-04
Loss = 1.4082e-02, PNorm = 173.2189, GNorm = 0.2239, lr_0 = 3.9132e-04
Loss = 1.6315e-02, PNorm = 173.2355, GNorm = 0.5032, lr_0 = 3.9105e-04
Loss = 1.2052e-02, PNorm = 173.2557, GNorm = 0.1752, lr_0 = 3.9078e-04
Loss = 1.4284e-02, PNorm = 173.2720, GNorm = 0.2640, lr_0 = 3.9051e-04
Loss = 1.3612e-02, PNorm = 173.2878, GNorm = 0.1854, lr_0 = 3.9025e-04
Loss = 2.6437e-02, PNorm = 173.3068, GNorm = 0.4211, lr_0 = 3.8998e-04
Loss = 1.4403e-02, PNorm = 173.3221, GNorm = 0.2973, lr_0 = 3.8971e-04
Loss = 1.8287e-02, PNorm = 173.3378, GNorm = 0.1425, lr_0 = 3.8945e-04
Loss = 2.0445e-02, PNorm = 173.3520, GNorm = 0.4190, lr_0 = 3.8918e-04
Loss = 1.9033e-02, PNorm = 173.3671, GNorm = 0.5006, lr_0 = 3.8891e-04
Loss = 1.3047e-02, PNorm = 173.3852, GNorm = 0.1971, lr_0 = 3.8865e-04
Loss = 1.6584e-02, PNorm = 173.4032, GNorm = 0.6353, lr_0 = 3.8838e-04
Loss = 1.4256e-02, PNorm = 173.4252, GNorm = 0.2479, lr_0 = 3.8811e-04
Loss = 1.4294e-02, PNorm = 173.4444, GNorm = 0.2842, lr_0 = 3.8785e-04
Loss = 2.0841e-02, PNorm = 173.4638, GNorm = 0.2243, lr_0 = 3.8758e-04
Loss = 1.9457e-02, PNorm = 173.4813, GNorm = 0.1204, lr_0 = 3.8732e-04
Loss = 1.5572e-02, PNorm = 173.4982, GNorm = 0.1713, lr_0 = 3.8705e-04
Loss = 1.1972e-02, PNorm = 173.5149, GNorm = 0.3966, lr_0 = 3.8679e-04
Loss = 2.5901e-02, PNorm = 173.5316, GNorm = 0.3375, lr_0 = 3.8652e-04
Loss = 1.1635e-02, PNorm = 173.5472, GNorm = 0.1573, lr_0 = 3.8626e-04
Loss = 1.2635e-02, PNorm = 173.5637, GNorm = 0.2049, lr_0 = 3.8599e-04
Loss = 1.6408e-02, PNorm = 173.5822, GNorm = 0.1488, lr_0 = 3.8573e-04
Loss = 1.5229e-02, PNorm = 173.6013, GNorm = 0.2733, lr_0 = 3.8546e-04
Loss = 1.6305e-02, PNorm = 173.6202, GNorm = 0.2337, lr_0 = 3.8520e-04
Loss = 1.8034e-02, PNorm = 173.6408, GNorm = 0.1942, lr_0 = 3.8493e-04
Loss = 1.4402e-02, PNorm = 173.6605, GNorm = 0.2735, lr_0 = 3.8467e-04
Loss = 1.2607e-02, PNorm = 173.6803, GNorm = 0.2882, lr_0 = 3.8441e-04
Loss = 2.1788e-02, PNorm = 173.7038, GNorm = 0.8664, lr_0 = 3.8414e-04
Loss = 2.2431e-02, PNorm = 173.7221, GNorm = 0.3030, lr_0 = 3.8388e-04
Loss = 1.8297e-02, PNorm = 173.7447, GNorm = 0.2471, lr_0 = 3.8362e-04
Loss = 1.5086e-02, PNorm = 173.7608, GNorm = 0.4114, lr_0 = 3.8336e-04
Loss = 1.3912e-02, PNorm = 173.7780, GNorm = 0.1419, lr_0 = 3.8309e-04
Loss = 1.4876e-02, PNorm = 173.7978, GNorm = 0.2447, lr_0 = 3.8283e-04
Loss = 1.8274e-02, PNorm = 173.8189, GNorm = 0.2349, lr_0 = 3.8257e-04
Loss = 1.5063e-02, PNorm = 173.8409, GNorm = 0.3219, lr_0 = 3.8231e-04
Loss = 1.8598e-02, PNorm = 173.8623, GNorm = 0.2619, lr_0 = 3.8204e-04
Loss = 1.7950e-02, PNorm = 173.8796, GNorm = 0.2268, lr_0 = 3.8178e-04
Loss = 1.3281e-02, PNorm = 173.8977, GNorm = 0.1497, lr_0 = 3.8152e-04
Loss = 1.4259e-02, PNorm = 173.9139, GNorm = 0.2631, lr_0 = 3.8126e-04
Loss = 1.4188e-02, PNorm = 173.9318, GNorm = 0.2067, lr_0 = 3.8100e-04
Loss = 1.2712e-02, PNorm = 173.9493, GNorm = 0.1946, lr_0 = 3.8074e-04
Loss = 1.8148e-02, PNorm = 173.9731, GNorm = 0.2846, lr_0 = 3.8048e-04
Loss = 1.9840e-02, PNorm = 173.9966, GNorm = 0.4613, lr_0 = 3.8022e-04
Loss = 1.2264e-02, PNorm = 174.0152, GNorm = 0.2399, lr_0 = 3.7995e-04
Loss = 2.4749e-02, PNorm = 174.0364, GNorm = 0.1787, lr_0 = 3.7969e-04
Loss = 1.4215e-02, PNorm = 174.0521, GNorm = 0.2474, lr_0 = 3.7943e-04
Loss = 1.3442e-02, PNorm = 174.0705, GNorm = 0.4835, lr_0 = 3.7917e-04
Loss = 1.6969e-02, PNorm = 174.0932, GNorm = 0.1892, lr_0 = 3.7891e-04
Loss = 1.5378e-02, PNorm = 174.1130, GNorm = 0.4858, lr_0 = 3.7866e-04
Loss = 1.3701e-02, PNorm = 174.1380, GNorm = 0.3552, lr_0 = 3.7840e-04
Loss = 1.4175e-02, PNorm = 174.1575, GNorm = 0.1732, lr_0 = 3.7814e-04
Loss = 1.7282e-02, PNorm = 174.1756, GNorm = 0.3170, lr_0 = 3.7788e-04
Loss = 1.3292e-02, PNorm = 174.1953, GNorm = 0.4656, lr_0 = 3.7762e-04
Loss = 1.8193e-02, PNorm = 174.2124, GNorm = 0.3991, lr_0 = 3.7736e-04
Loss = 1.5706e-02, PNorm = 174.2318, GNorm = 0.2072, lr_0 = 3.7710e-04
Loss = 1.4998e-02, PNorm = 174.2542, GNorm = 0.2621, lr_0 = 3.7684e-04
Loss = 2.1557e-02, PNorm = 174.2733, GNorm = 0.1960, lr_0 = 3.7659e-04
Loss = 1.2038e-02, PNorm = 174.2927, GNorm = 0.1607, lr_0 = 3.7633e-04
Loss = 2.2228e-02, PNorm = 174.3096, GNorm = 0.2108, lr_0 = 3.7607e-04
Loss = 2.1296e-02, PNorm = 174.3284, GNorm = 0.3593, lr_0 = 3.7581e-04
Loss = 1.6342e-02, PNorm = 174.3505, GNorm = 0.4705, lr_0 = 3.7555e-04
Loss = 2.0219e-02, PNorm = 174.3731, GNorm = 0.3752, lr_0 = 3.7530e-04
Loss = 1.7715e-02, PNorm = 174.4024, GNorm = 0.2451, lr_0 = 3.7504e-04
Loss = 1.6550e-02, PNorm = 174.4287, GNorm = 0.2104, lr_0 = 3.7478e-04
Loss = 1.6729e-02, PNorm = 174.4522, GNorm = 0.2760, lr_0 = 3.7453e-04
Loss = 1.3395e-02, PNorm = 174.4702, GNorm = 0.2574, lr_0 = 3.7427e-04
Loss = 1.5068e-02, PNorm = 174.4856, GNorm = 0.2226, lr_0 = 3.7401e-04
Loss = 1.5922e-02, PNorm = 174.5028, GNorm = 0.2219, lr_0 = 3.7376e-04
Loss = 1.4664e-02, PNorm = 174.5209, GNorm = 0.1916, lr_0 = 3.7350e-04
Loss = 1.6971e-02, PNorm = 174.5399, GNorm = 0.9381, lr_0 = 3.7325e-04
Loss = 1.4584e-02, PNorm = 174.5628, GNorm = 0.5749, lr_0 = 3.7299e-04
Loss = 1.5648e-02, PNorm = 174.5850, GNorm = 0.7959, lr_0 = 3.7273e-04
Validation mae = 0.121380
Epoch 14
Loss = 1.4180e-02, PNorm = 174.6058, GNorm = 0.1969, lr_0 = 3.7248e-04
Loss = 1.2053e-02, PNorm = 174.6203, GNorm = 0.2897, lr_0 = 3.7222e-04
Loss = 1.2172e-02, PNorm = 174.6335, GNorm = 0.1585, lr_0 = 3.7197e-04
Loss = 1.3024e-02, PNorm = 174.6462, GNorm = 0.2026, lr_0 = 3.7171e-04
Loss = 1.3071e-02, PNorm = 174.6587, GNorm = 0.4631, lr_0 = 3.7146e-04
Loss = 1.4369e-02, PNorm = 174.6702, GNorm = 0.1850, lr_0 = 3.7120e-04
Loss = 1.5613e-02, PNorm = 174.6823, GNorm = 0.2593, lr_0 = 3.7095e-04
Loss = 1.4466e-02, PNorm = 174.6942, GNorm = 0.2730, lr_0 = 3.7070e-04
Loss = 1.4926e-02, PNorm = 174.7092, GNorm = 0.2565, lr_0 = 3.7044e-04
Loss = 1.8868e-02, PNorm = 174.7211, GNorm = 0.8741, lr_0 = 3.7019e-04
Loss = 1.2962e-02, PNorm = 174.7354, GNorm = 0.6211, lr_0 = 3.6993e-04
Loss = 1.4850e-02, PNorm = 174.7521, GNorm = 0.2397, lr_0 = 3.6968e-04
Loss = 1.6474e-02, PNorm = 174.7650, GNorm = 0.9365, lr_0 = 3.6943e-04
Loss = 1.3161e-02, PNorm = 174.7782, GNorm = 0.2517, lr_0 = 3.6917e-04
Loss = 1.8527e-02, PNorm = 174.7945, GNorm = 0.2138, lr_0 = 3.6892e-04
Loss = 1.2177e-02, PNorm = 174.8082, GNorm = 0.2467, lr_0 = 3.6867e-04
Loss = 1.4156e-02, PNorm = 174.8199, GNorm = 0.2082, lr_0 = 3.6842e-04
Loss = 1.5165e-02, PNorm = 174.8320, GNorm = 0.2023, lr_0 = 3.6816e-04
Loss = 1.2075e-02, PNorm = 174.8468, GNorm = 0.1658, lr_0 = 3.6791e-04
Loss = 1.6299e-02, PNorm = 174.8624, GNorm = 0.1541, lr_0 = 3.6766e-04
Loss = 1.0934e-02, PNorm = 174.8756, GNorm = 0.3956, lr_0 = 3.6741e-04
Loss = 1.2241e-02, PNorm = 174.8884, GNorm = 0.4100, lr_0 = 3.6716e-04
Loss = 1.2688e-02, PNorm = 174.8987, GNorm = 0.4382, lr_0 = 3.6690e-04
Loss = 1.4586e-02, PNorm = 174.9130, GNorm = 0.4682, lr_0 = 3.6665e-04
Loss = 1.7303e-02, PNorm = 174.9265, GNorm = 0.2934, lr_0 = 3.6640e-04
Loss = 1.0967e-02, PNorm = 174.9431, GNorm = 0.2802, lr_0 = 3.6615e-04
Loss = 1.3332e-02, PNorm = 174.9592, GNorm = 0.2468, lr_0 = 3.6590e-04
Loss = 1.0780e-02, PNorm = 174.9758, GNorm = 0.2341, lr_0 = 3.6565e-04
Loss = 1.3942e-02, PNorm = 174.9897, GNorm = 0.2852, lr_0 = 3.6540e-04
Loss = 1.1448e-02, PNorm = 175.0039, GNorm = 0.3913, lr_0 = 3.6515e-04
Loss = 1.8619e-02, PNorm = 175.0166, GNorm = 0.3099, lr_0 = 3.6490e-04
Loss = 1.2814e-02, PNorm = 175.0307, GNorm = 0.3040, lr_0 = 3.6465e-04
Loss = 2.0057e-02, PNorm = 175.0446, GNorm = 0.1690, lr_0 = 3.6440e-04
Loss = 2.0632e-02, PNorm = 175.0584, GNorm = 0.2660, lr_0 = 3.6415e-04
Loss = 1.7498e-02, PNorm = 175.0722, GNorm = 0.4903, lr_0 = 3.6390e-04
Loss = 1.4035e-02, PNorm = 175.0845, GNorm = 0.2534, lr_0 = 3.6365e-04
Loss = 1.4384e-02, PNorm = 175.0987, GNorm = 0.8344, lr_0 = 3.6340e-04
Loss = 1.7401e-02, PNorm = 175.1158, GNorm = 0.1770, lr_0 = 3.6315e-04
Loss = 1.8787e-02, PNorm = 175.1312, GNorm = 0.3044, lr_0 = 3.6290e-04
Loss = 1.3911e-02, PNorm = 175.1470, GNorm = 0.1661, lr_0 = 3.6266e-04
Loss = 1.1292e-02, PNorm = 175.1637, GNorm = 0.1324, lr_0 = 3.6241e-04
Loss = 1.4755e-02, PNorm = 175.1798, GNorm = 0.1729, lr_0 = 3.6216e-04
Loss = 1.1235e-02, PNorm = 175.1957, GNorm = 0.2532, lr_0 = 3.6191e-04
Loss = 1.4628e-02, PNorm = 175.2111, GNorm = 0.2550, lr_0 = 3.6166e-04
Loss = 1.3418e-02, PNorm = 175.2240, GNorm = 0.3755, lr_0 = 3.6141e-04
Loss = 1.4832e-02, PNorm = 175.2396, GNorm = 0.6291, lr_0 = 3.6117e-04
Loss = 1.1870e-02, PNorm = 175.2549, GNorm = 0.2019, lr_0 = 3.6092e-04
Loss = 1.7962e-02, PNorm = 175.2732, GNorm = 0.2391, lr_0 = 3.6067e-04
Loss = 1.5410e-02, PNorm = 175.2909, GNorm = 0.2697, lr_0 = 3.6043e-04
Loss = 1.2560e-02, PNorm = 175.3054, GNorm = 0.2586, lr_0 = 3.6018e-04
Loss = 1.2725e-02, PNorm = 175.3181, GNorm = 0.1468, lr_0 = 3.5993e-04
Loss = 1.1201e-02, PNorm = 175.3342, GNorm = 0.3187, lr_0 = 3.5969e-04
Loss = 1.1329e-02, PNorm = 175.3511, GNorm = 0.2121, lr_0 = 3.5944e-04
Loss = 8.8182e-03, PNorm = 175.3674, GNorm = 0.1233, lr_0 = 3.5919e-04
Loss = 1.4578e-02, PNorm = 175.3833, GNorm = 0.1955, lr_0 = 3.5895e-04
Loss = 1.4292e-02, PNorm = 175.3992, GNorm = 0.1500, lr_0 = 3.5870e-04
Loss = 1.1691e-02, PNorm = 175.4122, GNorm = 0.2323, lr_0 = 3.5845e-04
Loss = 1.9834e-02, PNorm = 175.4276, GNorm = 0.4221, lr_0 = 3.5821e-04
Loss = 1.2673e-02, PNorm = 175.4441, GNorm = 0.2271, lr_0 = 3.5796e-04
Loss = 1.5648e-02, PNorm = 175.4647, GNorm = 0.2875, lr_0 = 3.5772e-04
Loss = 1.1209e-02, PNorm = 175.4812, GNorm = 0.2822, lr_0 = 3.5747e-04
Loss = 1.3171e-02, PNorm = 175.4953, GNorm = 0.2638, lr_0 = 3.5723e-04
Loss = 1.3049e-02, PNorm = 175.5077, GNorm = 0.2600, lr_0 = 3.5698e-04
Loss = 1.4126e-02, PNorm = 175.5257, GNorm = 0.2002, lr_0 = 3.5674e-04
Loss = 1.0412e-02, PNorm = 175.5412, GNorm = 0.1692, lr_0 = 3.5650e-04
Loss = 1.9325e-02, PNorm = 175.5562, GNorm = 0.5669, lr_0 = 3.5625e-04
Loss = 1.0881e-02, PNorm = 175.5718, GNorm = 0.4046, lr_0 = 3.5601e-04
Loss = 1.3086e-02, PNorm = 175.5870, GNorm = 0.2560, lr_0 = 3.5576e-04
Loss = 1.2394e-02, PNorm = 175.6026, GNorm = 0.1691, lr_0 = 3.5552e-04
Loss = 1.1880e-02, PNorm = 175.6186, GNorm = 0.4675, lr_0 = 3.5528e-04
Loss = 1.5929e-02, PNorm = 175.6338, GNorm = 0.3340, lr_0 = 3.5503e-04
Loss = 1.2265e-02, PNorm = 175.6470, GNorm = 0.2068, lr_0 = 3.5479e-04
Loss = 9.4086e-03, PNorm = 175.6608, GNorm = 0.3398, lr_0 = 3.5455e-04
Loss = 1.5171e-02, PNorm = 175.6768, GNorm = 0.3440, lr_0 = 3.5430e-04
Loss = 1.3298e-02, PNorm = 175.6935, GNorm = 0.4272, lr_0 = 3.5406e-04
Loss = 8.4480e-03, PNorm = 175.7113, GNorm = 0.2912, lr_0 = 3.5382e-04
Loss = 1.1169e-02, PNorm = 175.7258, GNorm = 0.1869, lr_0 = 3.5358e-04
Loss = 1.1143e-02, PNorm = 175.7388, GNorm = 0.4177, lr_0 = 3.5333e-04
Loss = 1.5417e-02, PNorm = 175.7545, GNorm = 0.1910, lr_0 = 3.5309e-04
Loss = 1.7958e-02, PNorm = 175.7706, GNorm = 0.4942, lr_0 = 3.5285e-04
Loss = 1.5127e-02, PNorm = 175.7871, GNorm = 0.4927, lr_0 = 3.5261e-04
Loss = 1.2247e-02, PNorm = 175.8030, GNorm = 0.1731, lr_0 = 3.5237e-04
Loss = 1.3479e-02, PNorm = 175.8202, GNorm = 0.2562, lr_0 = 3.5212e-04
Loss = 1.2890e-02, PNorm = 175.8362, GNorm = 0.2755, lr_0 = 3.5188e-04
Loss = 1.2675e-02, PNorm = 175.8511, GNorm = 0.2900, lr_0 = 3.5164e-04
Loss = 1.6725e-02, PNorm = 175.8669, GNorm = 0.3006, lr_0 = 3.5140e-04
Loss = 1.2348e-02, PNorm = 175.8822, GNorm = 0.4694, lr_0 = 3.5116e-04
Loss = 1.3530e-02, PNorm = 175.8991, GNorm = 0.2358, lr_0 = 3.5092e-04
Loss = 1.1174e-02, PNorm = 175.9185, GNorm = 0.1669, lr_0 = 3.5068e-04
Loss = 1.6195e-02, PNorm = 175.9361, GNorm = 0.3025, lr_0 = 3.5044e-04
Loss = 1.9046e-02, PNorm = 175.9531, GNorm = 0.2886, lr_0 = 3.5020e-04
Loss = 1.3286e-02, PNorm = 175.9720, GNorm = 0.2061, lr_0 = 3.4996e-04
Loss = 1.0608e-02, PNorm = 175.9892, GNorm = 0.1812, lr_0 = 3.4972e-04
Loss = 1.4822e-02, PNorm = 176.0089, GNorm = 0.1657, lr_0 = 3.4948e-04
Loss = 2.2905e-02, PNorm = 176.0260, GNorm = 0.1554, lr_0 = 3.4924e-04
Loss = 1.2309e-02, PNorm = 176.0451, GNorm = 0.1548, lr_0 = 3.4900e-04
Loss = 2.7754e-02, PNorm = 176.0620, GNorm = 2.9454, lr_0 = 3.4876e-04
Loss = 1.1837e-02, PNorm = 176.0810, GNorm = 0.1100, lr_0 = 3.4852e-04
Loss = 1.0598e-02, PNorm = 176.0963, GNorm = 0.2043, lr_0 = 3.4828e-04
Loss = 1.3020e-02, PNorm = 176.1165, GNorm = 0.2666, lr_0 = 3.4805e-04
Loss = 1.8081e-02, PNorm = 176.1350, GNorm = 1.7544, lr_0 = 3.4781e-04
Loss = 1.3553e-02, PNorm = 176.1481, GNorm = 0.4294, lr_0 = 3.4757e-04
Loss = 1.4432e-02, PNorm = 176.1643, GNorm = 0.3387, lr_0 = 3.4733e-04
Loss = 1.2380e-02, PNorm = 176.1799, GNorm = 0.1568, lr_0 = 3.4709e-04
Loss = 1.3686e-02, PNorm = 176.1995, GNorm = 0.3760, lr_0 = 3.4686e-04
Loss = 1.5593e-02, PNorm = 176.2186, GNorm = 0.2002, lr_0 = 3.4662e-04
Loss = 1.2104e-02, PNorm = 176.2369, GNorm = 0.3594, lr_0 = 3.4638e-04
Loss = 1.0864e-02, PNorm = 176.2516, GNorm = 0.2361, lr_0 = 3.4614e-04
Loss = 1.4409e-02, PNorm = 176.2672, GNorm = 0.3139, lr_0 = 3.4591e-04
Loss = 1.4998e-02, PNorm = 176.2843, GNorm = 0.4985, lr_0 = 3.4567e-04
Loss = 1.2226e-02, PNorm = 176.2997, GNorm = 0.1568, lr_0 = 3.4543e-04
Loss = 1.2986e-02, PNorm = 176.3147, GNorm = 0.3858, lr_0 = 3.4520e-04
Loss = 1.1780e-02, PNorm = 176.3313, GNorm = 0.2848, lr_0 = 3.4496e-04
Loss = 1.1694e-02, PNorm = 176.3471, GNorm = 0.2825, lr_0 = 3.4472e-04
Loss = 1.1220e-02, PNorm = 176.3655, GNorm = 0.3332, lr_0 = 3.4449e-04
Loss = 1.2202e-02, PNorm = 176.3818, GNorm = 0.3501, lr_0 = 3.4425e-04
Loss = 1.5557e-02, PNorm = 176.3982, GNorm = 0.3479, lr_0 = 3.4402e-04
Loss = 1.4473e-02, PNorm = 176.4176, GNorm = 0.4831, lr_0 = 3.4378e-04
Loss = 1.0866e-02, PNorm = 176.4351, GNorm = 0.3986, lr_0 = 3.4354e-04
Loss = 1.2458e-02, PNorm = 176.4512, GNorm = 0.3979, lr_0 = 3.4331e-04
Validation mae = 0.121567
Epoch 15
Loss = 9.9303e-03, PNorm = 176.4632, GNorm = 0.1840, lr_0 = 3.4307e-04
Loss = 1.7111e-02, PNorm = 176.4746, GNorm = 0.1866, lr_0 = 3.4284e-04
Loss = 1.3994e-02, PNorm = 176.4877, GNorm = 0.3280, lr_0 = 3.4260e-04
Loss = 1.0446e-02, PNorm = 176.5007, GNorm = 0.1776, lr_0 = 3.4237e-04
Loss = 1.1915e-02, PNorm = 176.5106, GNorm = 0.2912, lr_0 = 3.4213e-04
Loss = 1.4654e-02, PNorm = 176.5216, GNorm = 0.3524, lr_0 = 3.4190e-04
Loss = 1.0246e-02, PNorm = 176.5364, GNorm = 0.1607, lr_0 = 3.4167e-04
Loss = 8.4500e-03, PNorm = 176.5464, GNorm = 0.3329, lr_0 = 3.4143e-04
Loss = 1.0830e-02, PNorm = 176.5562, GNorm = 0.2897, lr_0 = 3.4120e-04
Loss = 1.4911e-02, PNorm = 176.5668, GNorm = 0.7709, lr_0 = 3.4096e-04
Loss = 1.0351e-02, PNorm = 176.5787, GNorm = 0.2994, lr_0 = 3.4073e-04
Loss = 9.4801e-03, PNorm = 176.5916, GNorm = 0.1980, lr_0 = 3.4050e-04
Loss = 1.0037e-02, PNorm = 176.6039, GNorm = 0.2897, lr_0 = 3.4026e-04
Loss = 1.0717e-02, PNorm = 176.6137, GNorm = 0.1528, lr_0 = 3.4003e-04
Loss = 1.5332e-02, PNorm = 176.6263, GNorm = 0.2021, lr_0 = 3.3980e-04
Loss = 1.0894e-02, PNorm = 176.6363, GNorm = 0.2255, lr_0 = 3.3956e-04
Loss = 1.1424e-02, PNorm = 176.6497, GNorm = 0.3003, lr_0 = 3.3933e-04
Loss = 1.0219e-02, PNorm = 176.6625, GNorm = 0.1167, lr_0 = 3.3910e-04
Loss = 1.6831e-02, PNorm = 176.6732, GNorm = 1.4448, lr_0 = 3.3887e-04
Loss = 1.1743e-02, PNorm = 176.6851, GNorm = 0.1995, lr_0 = 3.3864e-04
Loss = 1.2605e-02, PNorm = 176.6982, GNorm = 0.2330, lr_0 = 3.3840e-04
Loss = 1.3087e-02, PNorm = 176.7096, GNorm = 0.4557, lr_0 = 3.3817e-04
Loss = 1.1418e-02, PNorm = 176.7237, GNorm = 0.2310, lr_0 = 3.3794e-04
Loss = 1.4035e-02, PNorm = 176.7377, GNorm = 0.4015, lr_0 = 3.3771e-04
Loss = 9.7016e-03, PNorm = 176.7514, GNorm = 0.2075, lr_0 = 3.3748e-04
Loss = 1.5462e-02, PNorm = 176.7652, GNorm = 0.1860, lr_0 = 3.3725e-04
Loss = 1.0216e-02, PNorm = 176.7798, GNorm = 0.2316, lr_0 = 3.3701e-04
Loss = 1.8313e-02, PNorm = 176.7951, GNorm = 1.2448, lr_0 = 3.3678e-04
Loss = 1.1063e-02, PNorm = 176.8108, GNorm = 0.1310, lr_0 = 3.3655e-04
Loss = 1.2625e-02, PNorm = 176.8267, GNorm = 0.3615, lr_0 = 3.3632e-04
Loss = 1.0048e-02, PNorm = 176.8405, GNorm = 0.2912, lr_0 = 3.3609e-04
Loss = 1.1803e-02, PNorm = 176.8523, GNorm = 0.4356, lr_0 = 3.3586e-04
Loss = 9.7686e-03, PNorm = 176.8642, GNorm = 0.1771, lr_0 = 3.3563e-04
Loss = 1.2684e-02, PNorm = 176.8772, GNorm = 0.1901, lr_0 = 3.3540e-04
Loss = 1.2344e-02, PNorm = 176.8885, GNorm = 0.3288, lr_0 = 3.3517e-04
Loss = 1.0031e-02, PNorm = 176.9002, GNorm = 0.1432, lr_0 = 3.3494e-04
Loss = 8.6370e-03, PNorm = 176.9133, GNorm = 0.1361, lr_0 = 3.3471e-04
Loss = 9.7481e-03, PNorm = 176.9242, GNorm = 0.1370, lr_0 = 3.3448e-04
Loss = 1.1995e-02, PNorm = 176.9372, GNorm = 0.2590, lr_0 = 3.3425e-04
Loss = 9.4499e-03, PNorm = 176.9501, GNorm = 0.3671, lr_0 = 3.3403e-04
Loss = 1.1768e-02, PNorm = 176.9626, GNorm = 0.2167, lr_0 = 3.3380e-04
Loss = 1.3305e-02, PNorm = 176.9748, GNorm = 0.4110, lr_0 = 3.3357e-04
Loss = 1.1280e-02, PNorm = 176.9912, GNorm = 0.2264, lr_0 = 3.3334e-04
Loss = 1.9140e-02, PNorm = 176.9991, GNorm = 0.3176, lr_0 = 3.3311e-04
Loss = 2.3226e-02, PNorm = 177.0143, GNorm = 0.2047, lr_0 = 3.3288e-04
Loss = 1.1945e-02, PNorm = 177.0290, GNorm = 0.2592, lr_0 = 3.3265e-04
Loss = 1.0179e-02, PNorm = 177.0440, GNorm = 0.2378, lr_0 = 3.3243e-04
Loss = 1.1608e-02, PNorm = 177.0570, GNorm = 0.2294, lr_0 = 3.3220e-04
Loss = 9.4659e-03, PNorm = 177.0703, GNorm = 0.3379, lr_0 = 3.3197e-04
Loss = 1.1932e-02, PNorm = 177.0808, GNorm = 0.2197, lr_0 = 3.3174e-04
Loss = 1.4512e-02, PNorm = 177.0961, GNorm = 0.2126, lr_0 = 3.3152e-04
Loss = 9.9545e-03, PNorm = 177.1115, GNorm = 0.1888, lr_0 = 3.3129e-04
Loss = 1.2225e-02, PNorm = 177.1279, GNorm = 0.1769, lr_0 = 3.3106e-04
Loss = 1.4893e-02, PNorm = 177.1403, GNorm = 0.3236, lr_0 = 3.3084e-04
Loss = 1.2953e-02, PNorm = 177.1544, GNorm = 0.2087, lr_0 = 3.3061e-04
Loss = 1.4402e-02, PNorm = 177.1659, GNorm = 0.2342, lr_0 = 3.3038e-04
Loss = 1.4787e-02, PNorm = 177.1754, GNorm = 0.2147, lr_0 = 3.3016e-04
Loss = 9.8987e-03, PNorm = 177.1878, GNorm = 0.2428, lr_0 = 3.2993e-04
Loss = 1.4398e-02, PNorm = 177.2038, GNorm = 0.2396, lr_0 = 3.2970e-04
Loss = 1.1023e-02, PNorm = 177.2175, GNorm = 0.4792, lr_0 = 3.2948e-04
Loss = 9.8649e-03, PNorm = 177.2321, GNorm = 0.2081, lr_0 = 3.2925e-04
Loss = 1.2167e-02, PNorm = 177.2436, GNorm = 0.2159, lr_0 = 3.2903e-04
Loss = 2.2812e-02, PNorm = 177.2588, GNorm = 0.5338, lr_0 = 3.2880e-04
Loss = 1.2099e-02, PNorm = 177.2720, GNorm = 0.3192, lr_0 = 3.2858e-04
Loss = 1.9161e-02, PNorm = 177.2846, GNorm = 0.1941, lr_0 = 3.2835e-04
Loss = 9.5978e-03, PNorm = 177.2996, GNorm = 0.2976, lr_0 = 3.2813e-04
Loss = 1.1216e-02, PNorm = 177.3122, GNorm = 0.5757, lr_0 = 3.2790e-04
Loss = 1.2175e-02, PNorm = 177.3251, GNorm = 0.2481, lr_0 = 3.2768e-04
Loss = 1.0547e-02, PNorm = 177.3367, GNorm = 0.1545, lr_0 = 3.2745e-04
Loss = 1.1719e-02, PNorm = 177.3484, GNorm = 0.1690, lr_0 = 3.2723e-04
Loss = 1.2809e-02, PNorm = 177.3638, GNorm = 0.2582, lr_0 = 3.2700e-04
Loss = 9.8567e-03, PNorm = 177.3782, GNorm = 0.2154, lr_0 = 3.2678e-04
Loss = 1.2767e-02, PNorm = 177.3932, GNorm = 0.2984, lr_0 = 3.2656e-04
Loss = 1.8025e-02, PNorm = 177.4052, GNorm = 0.5207, lr_0 = 3.2633e-04
Loss = 1.1395e-02, PNorm = 177.4212, GNorm = 0.3395, lr_0 = 3.2611e-04
Loss = 1.0660e-02, PNorm = 177.4361, GNorm = 0.2166, lr_0 = 3.2589e-04
Loss = 1.3745e-02, PNorm = 177.4513, GNorm = 0.1602, lr_0 = 3.2566e-04
Loss = 1.3450e-02, PNorm = 177.4645, GNorm = 0.2281, lr_0 = 3.2544e-04
Loss = 1.2523e-02, PNorm = 177.4775, GNorm = 0.2986, lr_0 = 3.2522e-04
Loss = 1.2624e-02, PNorm = 177.4899, GNorm = 0.3166, lr_0 = 3.2499e-04
Loss = 1.0086e-02, PNorm = 177.5063, GNorm = 0.1435, lr_0 = 3.2477e-04
Loss = 1.2595e-02, PNorm = 177.5218, GNorm = 0.2257, lr_0 = 3.2455e-04
Loss = 1.3134e-02, PNorm = 177.5347, GNorm = 0.2210, lr_0 = 3.2433e-04
Loss = 9.1226e-03, PNorm = 177.5461, GNorm = 0.2352, lr_0 = 3.2410e-04
Loss = 8.2266e-03, PNorm = 177.5584, GNorm = 0.3454, lr_0 = 3.2388e-04
Loss = 1.2637e-02, PNorm = 177.5716, GNorm = 0.2076, lr_0 = 3.2366e-04
Loss = 1.6485e-02, PNorm = 177.5808, GNorm = 0.1780, lr_0 = 3.2344e-04
Loss = 9.3827e-03, PNorm = 177.5926, GNorm = 0.4667, lr_0 = 3.2322e-04
Loss = 1.2181e-02, PNorm = 177.6067, GNorm = 0.3403, lr_0 = 3.2300e-04
Loss = 8.7591e-03, PNorm = 177.6187, GNorm = 0.2529, lr_0 = 3.2277e-04
Loss = 9.2795e-03, PNorm = 177.6365, GNorm = 0.1329, lr_0 = 3.2255e-04
Loss = 8.9316e-03, PNorm = 177.6512, GNorm = 0.5044, lr_0 = 3.2233e-04
Loss = 1.9942e-02, PNorm = 177.6650, GNorm = 0.1740, lr_0 = 3.2211e-04
Loss = 9.6382e-03, PNorm = 177.6792, GNorm = 0.2350, lr_0 = 3.2189e-04
Loss = 1.2459e-02, PNorm = 177.6938, GNorm = 0.1906, lr_0 = 3.2167e-04
Loss = 9.1172e-03, PNorm = 177.7080, GNorm = 0.1065, lr_0 = 3.2145e-04
Loss = 1.0242e-02, PNorm = 177.7234, GNorm = 0.1225, lr_0 = 3.2123e-04
Loss = 1.0216e-02, PNorm = 177.7377, GNorm = 0.4558, lr_0 = 3.2101e-04
Loss = 8.0479e-03, PNorm = 177.7512, GNorm = 0.2840, lr_0 = 3.2079e-04
Loss = 1.0475e-02, PNorm = 177.7639, GNorm = 0.3266, lr_0 = 3.2057e-04
Loss = 1.2176e-02, PNorm = 177.7785, GNorm = 0.1809, lr_0 = 3.2035e-04
Loss = 1.2520e-02, PNorm = 177.7951, GNorm = 0.3531, lr_0 = 3.2013e-04
Loss = 2.1551e-02, PNorm = 177.8060, GNorm = 0.5483, lr_0 = 3.1991e-04
Loss = 1.3276e-02, PNorm = 177.8187, GNorm = 0.3540, lr_0 = 3.1969e-04
Loss = 1.0069e-02, PNorm = 177.8336, GNorm = 0.1377, lr_0 = 3.1947e-04
Loss = 1.3481e-02, PNorm = 177.8481, GNorm = 0.2376, lr_0 = 3.1925e-04
Loss = 9.1242e-03, PNorm = 177.8647, GNorm = 0.1558, lr_0 = 3.1904e-04
Loss = 9.1510e-03, PNorm = 177.8803, GNorm = 0.1909, lr_0 = 3.1882e-04
Loss = 1.4006e-02, PNorm = 177.8955, GNorm = 0.1781, lr_0 = 3.1860e-04
Loss = 1.2770e-02, PNorm = 177.9089, GNorm = 0.2255, lr_0 = 3.1838e-04
Loss = 2.2822e-02, PNorm = 177.9213, GNorm = 0.2057, lr_0 = 3.1816e-04
Loss = 1.0905e-02, PNorm = 177.9329, GNorm = 0.2925, lr_0 = 3.1794e-04
Loss = 1.2858e-02, PNorm = 177.9482, GNorm = 0.3496, lr_0 = 3.1773e-04
Loss = 9.9578e-03, PNorm = 177.9614, GNorm = 0.3209, lr_0 = 3.1751e-04
Loss = 1.0240e-02, PNorm = 177.9769, GNorm = 0.2389, lr_0 = 3.1729e-04
Loss = 1.1714e-02, PNorm = 177.9924, GNorm = 0.1625, lr_0 = 3.1707e-04
Loss = 1.4928e-02, PNorm = 178.0054, GNorm = 0.3892, lr_0 = 3.1686e-04
Loss = 1.2076e-02, PNorm = 178.0212, GNorm = 0.2109, lr_0 = 3.1664e-04
Loss = 1.1890e-02, PNorm = 178.0369, GNorm = 0.3625, lr_0 = 3.1642e-04
Loss = 1.6375e-02, PNorm = 178.0508, GNorm = 0.2271, lr_0 = 3.1621e-04
Validation mae = 0.121816
Epoch 16
Loss = 1.3700e-02, PNorm = 178.0592, GNorm = 0.4464, lr_0 = 3.1599e-04
Loss = 1.0815e-02, PNorm = 178.0712, GNorm = 0.1812, lr_0 = 3.1577e-04
Loss = 9.0421e-03, PNorm = 178.0831, GNorm = 0.2617, lr_0 = 3.1556e-04
Loss = 1.3964e-02, PNorm = 178.0959, GNorm = 0.4754, lr_0 = 3.1534e-04
Loss = 8.8650e-03, PNorm = 178.1059, GNorm = 0.2289, lr_0 = 3.1512e-04
Loss = 1.0241e-02, PNorm = 178.1158, GNorm = 0.2306, lr_0 = 3.1491e-04
Loss = 8.5914e-03, PNorm = 178.1247, GNorm = 0.1712, lr_0 = 3.1469e-04
Loss = 8.5093e-03, PNorm = 178.1347, GNorm = 0.2085, lr_0 = 3.1448e-04
Loss = 8.6940e-03, PNorm = 178.1424, GNorm = 0.1606, lr_0 = 3.1426e-04
Loss = 1.7146e-02, PNorm = 178.1515, GNorm = 0.1512, lr_0 = 3.1405e-04
Loss = 1.4331e-02, PNorm = 178.1620, GNorm = 0.4609, lr_0 = 3.1383e-04
Loss = 9.8922e-03, PNorm = 178.1723, GNorm = 0.3406, lr_0 = 3.1362e-04
Loss = 1.2921e-02, PNorm = 178.1808, GNorm = 1.4302, lr_0 = 3.1340e-04
Loss = 1.0340e-02, PNorm = 178.1939, GNorm = 0.1644, lr_0 = 3.1319e-04
Loss = 1.0602e-02, PNorm = 178.2036, GNorm = 0.3265, lr_0 = 3.1297e-04
Loss = 8.4144e-03, PNorm = 178.2135, GNorm = 0.4727, lr_0 = 3.1276e-04
Loss = 1.0942e-02, PNorm = 178.2267, GNorm = 0.1485, lr_0 = 3.1254e-04
Loss = 8.8543e-03, PNorm = 178.2372, GNorm = 0.2926, lr_0 = 3.1233e-04
Loss = 9.2005e-03, PNorm = 178.2454, GNorm = 0.1671, lr_0 = 3.1212e-04
Loss = 8.3988e-03, PNorm = 178.2553, GNorm = 0.4515, lr_0 = 3.1190e-04
Loss = 8.6673e-03, PNorm = 178.2627, GNorm = 0.1487, lr_0 = 3.1169e-04
Loss = 1.2682e-02, PNorm = 178.2723, GNorm = 0.2077, lr_0 = 3.1147e-04
Loss = 1.0231e-02, PNorm = 178.2843, GNorm = 0.1031, lr_0 = 3.1126e-04
Loss = 1.0417e-02, PNorm = 178.2951, GNorm = 0.2776, lr_0 = 3.1105e-04
Loss = 9.5922e-03, PNorm = 178.3057, GNorm = 0.0963, lr_0 = 3.1083e-04
Loss = 1.0979e-02, PNorm = 178.3154, GNorm = 0.3701, lr_0 = 3.1062e-04
Loss = 9.4197e-03, PNorm = 178.3246, GNorm = 0.2873, lr_0 = 3.1041e-04
Loss = 8.9098e-03, PNorm = 178.3325, GNorm = 0.3598, lr_0 = 3.1020e-04
Loss = 8.6774e-03, PNorm = 178.3437, GNorm = 0.1599, lr_0 = 3.0998e-04
Loss = 1.1640e-02, PNorm = 178.3557, GNorm = 0.1922, lr_0 = 3.0977e-04
Loss = 1.3557e-02, PNorm = 178.3669, GNorm = 0.8227, lr_0 = 3.0956e-04
Loss = 9.2885e-03, PNorm = 178.3751, GNorm = 0.2303, lr_0 = 3.0935e-04
Loss = 9.4829e-03, PNorm = 178.3840, GNorm = 0.1303, lr_0 = 3.0914e-04
Loss = 9.9702e-03, PNorm = 178.3961, GNorm = 0.2921, lr_0 = 3.0892e-04
Loss = 1.5174e-02, PNorm = 178.4013, GNorm = 0.4318, lr_0 = 3.0871e-04
Loss = 8.9462e-03, PNorm = 178.4129, GNorm = 0.2687, lr_0 = 3.0850e-04
Loss = 1.1843e-02, PNorm = 178.4249, GNorm = 1.3521, lr_0 = 3.0829e-04
Loss = 9.6877e-03, PNorm = 178.4356, GNorm = 0.1324, lr_0 = 3.0808e-04
Loss = 8.7090e-03, PNorm = 178.4459, GNorm = 0.3011, lr_0 = 3.0787e-04
Loss = 1.3284e-02, PNorm = 178.4572, GNorm = 0.3185, lr_0 = 3.0766e-04
Loss = 1.1196e-02, PNorm = 178.4694, GNorm = 0.2884, lr_0 = 3.0745e-04
Loss = 9.1868e-03, PNorm = 178.4807, GNorm = 0.2086, lr_0 = 3.0723e-04
Loss = 1.0622e-02, PNorm = 178.4931, GNorm = 0.1353, lr_0 = 3.0702e-04
Loss = 1.1711e-02, PNorm = 178.5033, GNorm = 0.1421, lr_0 = 3.0681e-04
Loss = 1.6392e-02, PNorm = 178.5127, GNorm = 0.4194, lr_0 = 3.0660e-04
Loss = 8.4440e-03, PNorm = 178.5203, GNorm = 0.2643, lr_0 = 3.0639e-04
Loss = 1.4919e-02, PNorm = 178.5316, GNorm = 0.4982, lr_0 = 3.0618e-04
Loss = 1.0274e-02, PNorm = 178.5437, GNorm = 0.2867, lr_0 = 3.0597e-04
Loss = 1.0031e-02, PNorm = 178.5528, GNorm = 0.1373, lr_0 = 3.0576e-04
Loss = 1.0772e-02, PNorm = 178.5618, GNorm = 0.1260, lr_0 = 3.0555e-04
Loss = 7.9172e-03, PNorm = 178.5734, GNorm = 0.2406, lr_0 = 3.0535e-04
Loss = 7.7916e-03, PNorm = 178.5831, GNorm = 0.1693, lr_0 = 3.0514e-04
Loss = 8.9584e-03, PNorm = 178.5958, GNorm = 0.2079, lr_0 = 3.0493e-04
Loss = 8.3973e-03, PNorm = 178.6080, GNorm = 0.1722, lr_0 = 3.0472e-04
Loss = 7.0163e-03, PNorm = 178.6174, GNorm = 0.2563, lr_0 = 3.0451e-04
Loss = 1.4522e-02, PNorm = 178.6278, GNorm = 0.2873, lr_0 = 3.0430e-04
Loss = 9.3184e-03, PNorm = 178.6414, GNorm = 0.1005, lr_0 = 3.0409e-04
Loss = 8.5602e-03, PNorm = 178.6518, GNorm = 0.1834, lr_0 = 3.0388e-04
Loss = 9.1411e-03, PNorm = 178.6651, GNorm = 0.2065, lr_0 = 3.0368e-04
Loss = 9.0646e-03, PNorm = 178.6781, GNorm = 0.1548, lr_0 = 3.0347e-04
Loss = 7.9547e-03, PNorm = 178.6897, GNorm = 0.1909, lr_0 = 3.0326e-04
Loss = 9.6356e-03, PNorm = 178.6990, GNorm = 0.2160, lr_0 = 3.0305e-04
Loss = 2.4406e-02, PNorm = 178.7126, GNorm = 0.2930, lr_0 = 3.0284e-04
Loss = 1.4307e-02, PNorm = 178.7202, GNorm = 0.3457, lr_0 = 3.0264e-04
Loss = 9.0177e-03, PNorm = 178.7303, GNorm = 0.1761, lr_0 = 3.0243e-04
Loss = 8.6299e-03, PNorm = 178.7414, GNorm = 0.1773, lr_0 = 3.0222e-04
Loss = 1.1307e-02, PNorm = 178.7505, GNorm = 0.1819, lr_0 = 3.0202e-04
Loss = 9.4043e-03, PNorm = 178.7574, GNorm = 0.1558, lr_0 = 3.0181e-04
Loss = 8.6792e-03, PNorm = 178.7706, GNorm = 0.3265, lr_0 = 3.0160e-04
Loss = 9.8789e-03, PNorm = 178.7827, GNorm = 0.1842, lr_0 = 3.0140e-04
Loss = 9.3466e-03, PNorm = 178.7943, GNorm = 0.2473, lr_0 = 3.0119e-04
Loss = 1.2747e-02, PNorm = 178.8048, GNorm = 0.1768, lr_0 = 3.0098e-04
Loss = 9.5668e-03, PNorm = 178.8141, GNorm = 0.1494, lr_0 = 3.0078e-04
Loss = 1.1238e-02, PNorm = 178.8254, GNorm = 0.2790, lr_0 = 3.0057e-04
Loss = 1.4281e-02, PNorm = 178.8372, GNorm = 0.1740, lr_0 = 3.0036e-04
Loss = 1.0193e-02, PNorm = 178.8452, GNorm = 0.1770, lr_0 = 3.0016e-04
Loss = 1.1022e-02, PNorm = 178.8605, GNorm = 0.3116, lr_0 = 2.9995e-04
Loss = 1.1172e-02, PNorm = 178.8724, GNorm = 0.3485, lr_0 = 2.9975e-04
Loss = 8.6759e-03, PNorm = 178.8858, GNorm = 0.1432, lr_0 = 2.9954e-04
Loss = 8.6140e-03, PNorm = 178.8983, GNorm = 0.1624, lr_0 = 2.9934e-04
Loss = 9.0230e-03, PNorm = 178.9109, GNorm = 0.3298, lr_0 = 2.9913e-04
Loss = 1.7960e-02, PNorm = 178.9236, GNorm = 0.1864, lr_0 = 2.9893e-04
Loss = 9.1286e-03, PNorm = 178.9377, GNorm = 0.1565, lr_0 = 2.9872e-04
Loss = 9.1996e-03, PNorm = 178.9515, GNorm = 0.1651, lr_0 = 2.9852e-04
Loss = 8.3379e-03, PNorm = 178.9629, GNorm = 0.1565, lr_0 = 2.9831e-04
Loss = 1.1204e-02, PNorm = 178.9762, GNorm = 0.2459, lr_0 = 2.9811e-04
Loss = 1.3876e-02, PNorm = 178.9914, GNorm = 0.1378, lr_0 = 2.9790e-04
Loss = 8.6317e-03, PNorm = 179.0016, GNorm = 0.2212, lr_0 = 2.9770e-04
Loss = 7.1236e-03, PNorm = 179.0141, GNorm = 0.2155, lr_0 = 2.9750e-04
Loss = 9.5746e-03, PNorm = 179.0246, GNorm = 0.1418, lr_0 = 2.9729e-04
Loss = 8.1844e-03, PNorm = 179.0339, GNorm = 0.2083, lr_0 = 2.9709e-04
Loss = 9.5557e-03, PNorm = 179.0431, GNorm = 0.2682, lr_0 = 2.9689e-04
Loss = 8.3708e-03, PNorm = 179.0546, GNorm = 0.1883, lr_0 = 2.9668e-04
Loss = 1.2477e-02, PNorm = 179.0658, GNorm = 0.1719, lr_0 = 2.9648e-04
Loss = 1.4989e-02, PNorm = 179.0769, GNorm = 0.6236, lr_0 = 2.9628e-04
Loss = 8.1009e-03, PNorm = 179.0879, GNorm = 0.1083, lr_0 = 2.9607e-04
Loss = 7.4844e-03, PNorm = 179.0986, GNorm = 0.1591, lr_0 = 2.9587e-04
Loss = 1.0821e-02, PNorm = 179.1092, GNorm = 0.1485, lr_0 = 2.9567e-04
Loss = 1.3655e-02, PNorm = 179.1189, GNorm = 0.2673, lr_0 = 2.9546e-04
Loss = 8.2857e-03, PNorm = 179.1283, GNorm = 0.1154, lr_0 = 2.9526e-04
Loss = 1.1096e-02, PNorm = 179.1394, GNorm = 0.3285, lr_0 = 2.9506e-04
Loss = 7.8961e-03, PNorm = 179.1511, GNorm = 0.1199, lr_0 = 2.9486e-04
Loss = 1.6775e-02, PNorm = 179.1578, GNorm = 0.4338, lr_0 = 2.9466e-04
Loss = 1.6504e-02, PNorm = 179.1715, GNorm = 0.1993, lr_0 = 2.9445e-04
Loss = 1.1194e-02, PNorm = 179.1853, GNorm = 0.2341, lr_0 = 2.9425e-04
Loss = 7.9785e-03, PNorm = 179.2014, GNorm = 0.0822, lr_0 = 2.9405e-04
Loss = 8.9724e-03, PNorm = 179.2168, GNorm = 0.1613, lr_0 = 2.9385e-04
Loss = 1.2777e-02, PNorm = 179.2298, GNorm = 0.1435, lr_0 = 2.9365e-04
Loss = 1.4780e-02, PNorm = 179.2413, GNorm = 0.2453, lr_0 = 2.9345e-04
Loss = 9.1173e-03, PNorm = 179.2529, GNorm = 0.3568, lr_0 = 2.9325e-04
Loss = 8.3751e-03, PNorm = 179.2631, GNorm = 0.2184, lr_0 = 2.9305e-04
Loss = 9.1323e-03, PNorm = 179.2726, GNorm = 0.1939, lr_0 = 2.9284e-04
Loss = 9.4914e-03, PNorm = 179.2823, GNorm = 0.1687, lr_0 = 2.9264e-04
Loss = 1.1071e-02, PNorm = 179.2912, GNorm = 0.3267, lr_0 = 2.9244e-04
Loss = 1.3020e-02, PNorm = 179.2998, GNorm = 0.3252, lr_0 = 2.9224e-04
Loss = 1.0237e-02, PNorm = 179.3109, GNorm = 0.2214, lr_0 = 2.9204e-04
Loss = 9.4650e-03, PNorm = 179.3244, GNorm = 0.1187, lr_0 = 2.9184e-04
Loss = 1.3489e-02, PNorm = 179.3383, GNorm = 0.4202, lr_0 = 2.9164e-04
Loss = 1.1893e-02, PNorm = 179.3524, GNorm = 0.3611, lr_0 = 2.9144e-04
Loss = 9.9622e-03, PNorm = 179.3625, GNorm = 0.2834, lr_0 = 2.9124e-04
Validation mae = 0.121422
Epoch 17
Loss = 7.1250e-03, PNorm = 179.3725, GNorm = 0.1291, lr_0 = 2.9104e-04
Loss = 1.4353e-02, PNorm = 179.3812, GNorm = 0.3789, lr_0 = 2.9084e-04
Loss = 1.3756e-02, PNorm = 179.3916, GNorm = 0.1229, lr_0 = 2.9065e-04
Loss = 7.3924e-03, PNorm = 179.3999, GNorm = 0.2256, lr_0 = 2.9045e-04
Loss = 7.2301e-03, PNorm = 179.4073, GNorm = 0.1379, lr_0 = 2.9025e-04
Loss = 8.7733e-03, PNorm = 179.4188, GNorm = 0.1074, lr_0 = 2.9005e-04
Loss = 7.7600e-03, PNorm = 179.4302, GNorm = 0.3039, lr_0 = 2.8985e-04
Loss = 6.6275e-03, PNorm = 179.4376, GNorm = 0.1550, lr_0 = 2.8965e-04
Loss = 1.1378e-02, PNorm = 179.4473, GNorm = 0.1967, lr_0 = 2.8945e-04
Loss = 1.0372e-02, PNorm = 179.4544, GNorm = 0.1778, lr_0 = 2.8925e-04
Loss = 7.5744e-03, PNorm = 179.4621, GNorm = 0.3100, lr_0 = 2.8906e-04
Loss = 6.5069e-03, PNorm = 179.4702, GNorm = 0.2071, lr_0 = 2.8886e-04
Loss = 9.0142e-03, PNorm = 179.4796, GNorm = 0.2744, lr_0 = 2.8866e-04
Loss = 8.4794e-03, PNorm = 179.4905, GNorm = 0.1307, lr_0 = 2.8846e-04
Loss = 8.4064e-03, PNorm = 179.4998, GNorm = 0.2928, lr_0 = 2.8826e-04
Loss = 7.0143e-03, PNorm = 179.5066, GNorm = 0.3280, lr_0 = 2.8807e-04
Loss = 6.5333e-03, PNorm = 179.5141, GNorm = 0.3245, lr_0 = 2.8787e-04
Loss = 7.8191e-03, PNorm = 179.5190, GNorm = 0.2843, lr_0 = 2.8767e-04
Loss = 6.9088e-03, PNorm = 179.5273, GNorm = 0.2259, lr_0 = 2.8748e-04
Loss = 7.0178e-03, PNorm = 179.5349, GNorm = 0.1561, lr_0 = 2.8728e-04
Loss = 1.0989e-02, PNorm = 179.5438, GNorm = 0.8526, lr_0 = 2.8708e-04
Loss = 7.8924e-03, PNorm = 179.5529, GNorm = 0.2423, lr_0 = 2.8689e-04
Loss = 9.0454e-03, PNorm = 179.5621, GNorm = 0.1590, lr_0 = 2.8669e-04
Loss = 6.8713e-03, PNorm = 179.5698, GNorm = 0.3590, lr_0 = 2.8649e-04
Loss = 8.0333e-03, PNorm = 179.5796, GNorm = 0.2206, lr_0 = 2.8630e-04
Loss = 6.5494e-03, PNorm = 179.5899, GNorm = 0.2623, lr_0 = 2.8610e-04
Loss = 8.6262e-03, PNorm = 179.6006, GNorm = 0.1049, lr_0 = 2.8590e-04
Loss = 8.6673e-03, PNorm = 179.6116, GNorm = 0.3779, lr_0 = 2.8571e-04
Loss = 7.9659e-03, PNorm = 179.6203, GNorm = 0.5179, lr_0 = 2.8551e-04
Loss = 6.6720e-03, PNorm = 179.6279, GNorm = 0.2584, lr_0 = 2.8532e-04
Loss = 9.3906e-03, PNorm = 179.6383, GNorm = 0.3263, lr_0 = 2.8512e-04
Loss = 1.1525e-02, PNorm = 179.6473, GNorm = 0.2351, lr_0 = 2.8493e-04
Loss = 1.5667e-02, PNorm = 179.6576, GNorm = 0.2883, lr_0 = 2.8473e-04
Loss = 9.1008e-03, PNorm = 179.6660, GNorm = 0.1489, lr_0 = 2.8454e-04
Loss = 8.3138e-03, PNorm = 179.6737, GNorm = 0.3889, lr_0 = 2.8434e-04
Loss = 5.5403e-03, PNorm = 179.6820, GNorm = 0.1415, lr_0 = 2.8415e-04
Loss = 1.0453e-02, PNorm = 179.6906, GNorm = 0.3400, lr_0 = 2.8395e-04
Loss = 9.4843e-03, PNorm = 179.6970, GNorm = 0.1830, lr_0 = 2.8376e-04
Loss = 6.5240e-03, PNorm = 179.7049, GNorm = 0.2627, lr_0 = 2.8356e-04
Loss = 9.0854e-03, PNorm = 179.7123, GNorm = 0.2944, lr_0 = 2.8337e-04
Loss = 8.1585e-03, PNorm = 179.7222, GNorm = 0.3264, lr_0 = 2.8317e-04
Loss = 1.4997e-02, PNorm = 179.7311, GNorm = 0.1164, lr_0 = 2.8298e-04
Loss = 7.9582e-03, PNorm = 179.7403, GNorm = 0.5158, lr_0 = 2.8279e-04
Loss = 7.5914e-03, PNorm = 179.7480, GNorm = 0.2170, lr_0 = 2.8259e-04
Loss = 8.6275e-03, PNorm = 179.7556, GNorm = 0.2078, lr_0 = 2.8240e-04
Loss = 9.5626e-03, PNorm = 179.7629, GNorm = 0.5720, lr_0 = 2.8221e-04
Loss = 1.1724e-02, PNorm = 179.7679, GNorm = 0.1569, lr_0 = 2.8201e-04
Loss = 9.9906e-03, PNorm = 179.7792, GNorm = 0.4329, lr_0 = 2.8182e-04
Loss = 1.0303e-02, PNorm = 179.7903, GNorm = 0.2327, lr_0 = 2.8163e-04
Loss = 1.1175e-02, PNorm = 179.7979, GNorm = 0.3728, lr_0 = 2.8143e-04
Loss = 2.6879e-02, PNorm = 179.8112, GNorm = 0.3780, lr_0 = 2.8124e-04
Loss = 9.1239e-03, PNorm = 179.8207, GNorm = 0.2146, lr_0 = 2.8105e-04
Loss = 1.0256e-02, PNorm = 179.8321, GNorm = 0.1623, lr_0 = 2.8085e-04
Loss = 1.0209e-02, PNorm = 179.8441, GNorm = 0.3957, lr_0 = 2.8066e-04
Loss = 1.7399e-02, PNorm = 179.8500, GNorm = 0.5956, lr_0 = 2.8047e-04
Loss = 8.8627e-03, PNorm = 179.8593, GNorm = 0.1339, lr_0 = 2.8028e-04
Loss = 7.6813e-03, PNorm = 179.8679, GNorm = 0.1837, lr_0 = 2.8009e-04
Loss = 7.0215e-03, PNorm = 179.8762, GNorm = 0.2268, lr_0 = 2.7989e-04
Loss = 6.8346e-03, PNorm = 179.8847, GNorm = 0.1529, lr_0 = 2.7970e-04
Loss = 7.2099e-03, PNorm = 179.8921, GNorm = 0.3007, lr_0 = 2.7951e-04
Loss = 6.4144e-03, PNorm = 179.9019, GNorm = 0.2615, lr_0 = 2.7932e-04
Loss = 1.0307e-02, PNorm = 179.9087, GNorm = 0.1777, lr_0 = 2.7913e-04
Loss = 1.1155e-02, PNorm = 179.9134, GNorm = 0.6237, lr_0 = 2.7894e-04
Loss = 1.4609e-02, PNorm = 179.9229, GNorm = 1.5681, lr_0 = 2.7875e-04
Loss = 1.2787e-02, PNorm = 179.9304, GNorm = 0.5232, lr_0 = 2.7855e-04
Loss = 7.1156e-03, PNorm = 179.9399, GNorm = 0.1277, lr_0 = 2.7836e-04
Loss = 6.2646e-03, PNorm = 179.9485, GNorm = 0.1226, lr_0 = 2.7817e-04
Loss = 5.4781e-03, PNorm = 179.9575, GNorm = 0.1595, lr_0 = 2.7798e-04
Loss = 1.0053e-02, PNorm = 179.9659, GNorm = 0.4689, lr_0 = 2.7779e-04
Loss = 1.4611e-02, PNorm = 179.9775, GNorm = 0.2965, lr_0 = 2.7760e-04
Loss = 6.1106e-03, PNorm = 179.9860, GNorm = 0.2222, lr_0 = 2.7741e-04
Loss = 5.8925e-03, PNorm = 179.9926, GNorm = 0.1495, lr_0 = 2.7722e-04
Loss = 7.0639e-03, PNorm = 179.9989, GNorm = 0.1460, lr_0 = 2.7703e-04
Loss = 6.6284e-03, PNorm = 180.0059, GNorm = 0.2078, lr_0 = 2.7684e-04
Loss = 6.4922e-03, PNorm = 180.0152, GNorm = 0.1988, lr_0 = 2.7665e-04
Loss = 7.7532e-03, PNorm = 180.0239, GNorm = 0.4359, lr_0 = 2.7646e-04
Loss = 7.4559e-03, PNorm = 180.0338, GNorm = 0.2113, lr_0 = 2.7627e-04
Loss = 7.7515e-03, PNorm = 180.0456, GNorm = 0.1323, lr_0 = 2.7608e-04
Loss = 8.9221e-03, PNorm = 180.0564, GNorm = 0.1064, lr_0 = 2.7590e-04
Loss = 8.0143e-03, PNorm = 180.0650, GNorm = 0.1455, lr_0 = 2.7571e-04
Loss = 6.6000e-03, PNorm = 180.0736, GNorm = 0.2124, lr_0 = 2.7552e-04
Loss = 1.1871e-02, PNorm = 180.0823, GNorm = 0.3806, lr_0 = 2.7533e-04
Loss = 9.0025e-03, PNorm = 180.0925, GNorm = 0.3446, lr_0 = 2.7514e-04
Loss = 7.5133e-03, PNorm = 180.0993, GNorm = 0.3161, lr_0 = 2.7495e-04
Loss = 7.2430e-03, PNorm = 180.1058, GNorm = 0.1074, lr_0 = 2.7476e-04
Loss = 8.7446e-03, PNorm = 180.1150, GNorm = 0.1753, lr_0 = 2.7457e-04
Loss = 7.3597e-03, PNorm = 180.1242, GNorm = 0.3039, lr_0 = 2.7439e-04
Loss = 9.3258e-03, PNorm = 180.1332, GNorm = 0.1329, lr_0 = 2.7420e-04
Loss = 9.1256e-03, PNorm = 180.1386, GNorm = 0.2602, lr_0 = 2.7401e-04
Loss = 7.7054e-03, PNorm = 180.1468, GNorm = 0.0987, lr_0 = 2.7382e-04
Loss = 8.9514e-03, PNorm = 180.1573, GNorm = 0.1410, lr_0 = 2.7364e-04
Loss = 1.1918e-02, PNorm = 180.1680, GNorm = 0.2044, lr_0 = 2.7345e-04
Loss = 7.3341e-03, PNorm = 180.1768, GNorm = 0.1171, lr_0 = 2.7326e-04
Loss = 5.0803e-03, PNorm = 180.1846, GNorm = 0.1649, lr_0 = 2.7307e-04
Loss = 7.1884e-03, PNorm = 180.1923, GNorm = 0.0975, lr_0 = 2.7289e-04
Loss = 5.9109e-03, PNorm = 180.2021, GNorm = 0.1564, lr_0 = 2.7270e-04
Loss = 9.7131e-03, PNorm = 180.2120, GNorm = 0.1700, lr_0 = 2.7251e-04
Loss = 1.2019e-02, PNorm = 180.2244, GNorm = 0.2677, lr_0 = 2.7233e-04
Loss = 6.7599e-03, PNorm = 180.2341, GNorm = 0.1226, lr_0 = 2.7214e-04
Loss = 1.0649e-02, PNorm = 180.2476, GNorm = 0.2819, lr_0 = 2.7195e-04
Loss = 1.2447e-02, PNorm = 180.2583, GNorm = 0.2623, lr_0 = 2.7177e-04
Loss = 1.0926e-02, PNorm = 180.2651, GNorm = 0.8630, lr_0 = 2.7158e-04
Loss = 1.0251e-02, PNorm = 180.2728, GNorm = 0.2223, lr_0 = 2.7139e-04
Loss = 9.5507e-03, PNorm = 180.2819, GNorm = 0.1327, lr_0 = 2.7121e-04
Loss = 1.0063e-02, PNorm = 180.2911, GNorm = 0.1103, lr_0 = 2.7102e-04
Loss = 7.3862e-03, PNorm = 180.2997, GNorm = 0.2918, lr_0 = 2.7084e-04
Loss = 1.7401e-02, PNorm = 180.3086, GNorm = 0.1708, lr_0 = 2.7065e-04
Loss = 1.0117e-02, PNorm = 180.3209, GNorm = 0.3275, lr_0 = 2.7047e-04
Loss = 1.7375e-02, PNorm = 180.3304, GNorm = 0.2185, lr_0 = 2.7028e-04
Loss = 8.5880e-03, PNorm = 180.3413, GNorm = 0.3206, lr_0 = 2.7010e-04
Loss = 7.0856e-03, PNorm = 180.3520, GNorm = 0.3041, lr_0 = 2.6991e-04
Loss = 9.0838e-03, PNorm = 180.3599, GNorm = 0.2859, lr_0 = 2.6973e-04
Loss = 9.5647e-03, PNorm = 180.3697, GNorm = 0.1269, lr_0 = 2.6954e-04
Loss = 9.3755e-03, PNorm = 180.3791, GNorm = 0.1864, lr_0 = 2.6936e-04
Loss = 7.4834e-03, PNorm = 180.3897, GNorm = 0.3103, lr_0 = 2.6917e-04
Loss = 6.7634e-03, PNorm = 180.3990, GNorm = 0.1711, lr_0 = 2.6899e-04
Loss = 7.7676e-03, PNorm = 180.4097, GNorm = 0.1596, lr_0 = 2.6880e-04
Loss = 1.0368e-02, PNorm = 180.4198, GNorm = 0.2622, lr_0 = 2.6862e-04
Loss = 7.6791e-03, PNorm = 180.4315, GNorm = 0.1784, lr_0 = 2.6844e-04
Loss = 8.1482e-03, PNorm = 180.4410, GNorm = 0.1878, lr_0 = 2.6825e-04
Validation mae = 0.121578
Epoch 18
Loss = 7.4899e-03, PNorm = 180.4471, GNorm = 0.3042, lr_0 = 2.6807e-04
Loss = 7.1959e-03, PNorm = 180.4540, GNorm = 0.1498, lr_0 = 2.6788e-04
Loss = 6.3044e-03, PNorm = 180.4615, GNorm = 0.1105, lr_0 = 2.6770e-04
Loss = 6.6613e-03, PNorm = 180.4662, GNorm = 0.2784, lr_0 = 2.6752e-04
Loss = 5.5617e-03, PNorm = 180.4707, GNorm = 0.2098, lr_0 = 2.6733e-04
Loss = 7.8846e-03, PNorm = 180.4765, GNorm = 0.1214, lr_0 = 2.6715e-04
Loss = 7.3730e-03, PNorm = 180.4840, GNorm = 0.1494, lr_0 = 2.6697e-04
Loss = 8.2151e-03, PNorm = 180.4902, GNorm = 0.1950, lr_0 = 2.6678e-04
Loss = 9.7020e-03, PNorm = 180.4963, GNorm = 0.2357, lr_0 = 2.6660e-04
Loss = 6.6304e-03, PNorm = 180.5020, GNorm = 0.4820, lr_0 = 2.6642e-04
Loss = 6.5720e-03, PNorm = 180.5085, GNorm = 0.1330, lr_0 = 2.6624e-04
Loss = 5.5703e-03, PNorm = 180.5154, GNorm = 0.1500, lr_0 = 2.6605e-04
Loss = 5.3763e-03, PNorm = 180.5232, GNorm = 0.1157, lr_0 = 2.6587e-04
Loss = 6.0592e-03, PNorm = 180.5299, GNorm = 0.1496, lr_0 = 2.6569e-04
Loss = 5.9134e-03, PNorm = 180.5355, GNorm = 0.3065, lr_0 = 2.6551e-04
Loss = 1.0087e-02, PNorm = 180.5418, GNorm = 0.3183, lr_0 = 2.6533e-04
Loss = 7.7403e-03, PNorm = 180.5497, GNorm = 0.1543, lr_0 = 2.6514e-04
Loss = 6.2601e-03, PNorm = 180.5578, GNorm = 0.2704, lr_0 = 2.6496e-04
Loss = 9.6689e-03, PNorm = 180.5655, GNorm = 0.2694, lr_0 = 2.6478e-04
Loss = 8.5095e-03, PNorm = 180.5716, GNorm = 0.2491, lr_0 = 2.6460e-04
Loss = 1.5890e-02, PNorm = 180.5757, GNorm = 0.2078, lr_0 = 2.6442e-04
Loss = 6.7017e-03, PNorm = 180.5835, GNorm = 0.2061, lr_0 = 2.6424e-04
Loss = 7.0197e-03, PNorm = 180.5914, GNorm = 0.2471, lr_0 = 2.6406e-04
Loss = 5.8264e-03, PNorm = 180.6009, GNorm = 0.1366, lr_0 = 2.6388e-04
Loss = 8.0315e-03, PNorm = 180.6086, GNorm = 0.1585, lr_0 = 2.6369e-04
Loss = 7.3640e-03, PNorm = 180.6135, GNorm = 0.1052, lr_0 = 2.6351e-04
Loss = 6.4246e-03, PNorm = 180.6217, GNorm = 0.1811, lr_0 = 2.6333e-04
Loss = 7.8402e-03, PNorm = 180.6293, GNorm = 0.1185, lr_0 = 2.6315e-04
Loss = 5.7702e-03, PNorm = 180.6350, GNorm = 0.1081, lr_0 = 2.6297e-04
Loss = 5.1519e-03, PNorm = 180.6411, GNorm = 0.1585, lr_0 = 2.6279e-04
Loss = 6.6431e-03, PNorm = 180.6466, GNorm = 0.2136, lr_0 = 2.6261e-04
Loss = 1.0566e-02, PNorm = 180.6516, GNorm = 0.3113, lr_0 = 2.6243e-04
Loss = 8.0759e-03, PNorm = 180.6571, GNorm = 0.1326, lr_0 = 2.6225e-04
Loss = 6.1991e-03, PNorm = 180.6649, GNorm = 0.1692, lr_0 = 2.6207e-04
Loss = 7.3130e-03, PNorm = 180.6735, GNorm = 0.1837, lr_0 = 2.6189e-04
Loss = 7.5373e-03, PNorm = 180.6818, GNorm = 0.4086, lr_0 = 2.6171e-04
Loss = 6.8034e-03, PNorm = 180.6876, GNorm = 0.1676, lr_0 = 2.6153e-04
Loss = 7.7737e-03, PNorm = 180.6955, GNorm = 0.2064, lr_0 = 2.6136e-04
Loss = 6.4733e-03, PNorm = 180.7027, GNorm = 0.2469, lr_0 = 2.6118e-04
Loss = 7.0364e-03, PNorm = 180.7118, GNorm = 0.0923, lr_0 = 2.6100e-04
Loss = 6.8777e-03, PNorm = 180.7204, GNorm = 0.4314, lr_0 = 2.6082e-04
Loss = 7.6654e-03, PNorm = 180.7310, GNorm = 0.2461, lr_0 = 2.6064e-04
Loss = 8.1896e-03, PNorm = 180.7402, GNorm = 0.1065, lr_0 = 2.6046e-04
Loss = 8.8333e-03, PNorm = 180.7481, GNorm = 0.2033, lr_0 = 2.6028e-04
Loss = 1.3712e-02, PNorm = 180.7554, GNorm = 0.1180, lr_0 = 2.6011e-04
Loss = 1.0852e-02, PNorm = 180.7641, GNorm = 0.2166, lr_0 = 2.5993e-04
Loss = 1.0264e-02, PNorm = 180.7730, GNorm = 0.1454, lr_0 = 2.5975e-04
Loss = 6.8975e-03, PNorm = 180.7801, GNorm = 0.3339, lr_0 = 2.5957e-04
Loss = 7.1069e-03, PNorm = 180.7869, GNorm = 0.2195, lr_0 = 2.5939e-04
Loss = 9.3500e-03, PNorm = 180.7950, GNorm = 0.1807, lr_0 = 2.5922e-04
Loss = 8.4412e-03, PNorm = 180.8040, GNorm = 0.2138, lr_0 = 2.5904e-04
Loss = 5.5677e-03, PNorm = 180.8147, GNorm = 0.3417, lr_0 = 2.5886e-04
Loss = 7.7687e-03, PNorm = 180.8242, GNorm = 0.2168, lr_0 = 2.5868e-04
Loss = 8.8069e-03, PNorm = 180.8312, GNorm = 0.2129, lr_0 = 2.5851e-04
Loss = 2.2865e-02, PNorm = 180.8402, GNorm = 0.3918, lr_0 = 2.5833e-04
Loss = 1.0066e-02, PNorm = 180.8499, GNorm = 0.2109, lr_0 = 2.5815e-04
Loss = 1.5266e-02, PNorm = 180.8576, GNorm = 0.3986, lr_0 = 2.5797e-04
Loss = 9.4864e-03, PNorm = 180.8641, GNorm = 0.1982, lr_0 = 2.5780e-04
Loss = 7.3366e-03, PNorm = 180.8735, GNorm = 0.1368, lr_0 = 2.5762e-04
Loss = 7.3019e-03, PNorm = 180.8829, GNorm = 0.2305, lr_0 = 2.5745e-04
Loss = 6.8423e-03, PNorm = 180.8918, GNorm = 0.1759, lr_0 = 2.5727e-04
Loss = 1.0324e-02, PNorm = 180.8978, GNorm = 0.4795, lr_0 = 2.5709e-04
Loss = 1.4636e-02, PNorm = 180.9033, GNorm = 0.3055, lr_0 = 2.5692e-04
Loss = 6.0361e-03, PNorm = 180.9135, GNorm = 0.2670, lr_0 = 2.5674e-04
Loss = 1.2309e-02, PNorm = 180.9232, GNorm = 0.1512, lr_0 = 2.5656e-04
Loss = 7.0800e-03, PNorm = 180.9307, GNorm = 0.3833, lr_0 = 2.5639e-04
Loss = 1.0107e-02, PNorm = 180.9390, GNorm = 0.7707, lr_0 = 2.5621e-04
Loss = 6.4963e-03, PNorm = 180.9487, GNorm = 0.1379, lr_0 = 2.5604e-04
Loss = 1.0196e-02, PNorm = 180.9546, GNorm = 0.8453, lr_0 = 2.5586e-04
Loss = 1.0677e-02, PNorm = 180.9631, GNorm = 0.3167, lr_0 = 2.5569e-04
Loss = 8.9304e-03, PNorm = 180.9730, GNorm = 0.4894, lr_0 = 2.5551e-04
Loss = 1.1406e-02, PNorm = 180.9808, GNorm = 0.2562, lr_0 = 2.5534e-04
Loss = 1.1728e-02, PNorm = 180.9878, GNorm = 0.2904, lr_0 = 2.5516e-04
Loss = 5.4448e-03, PNorm = 180.9980, GNorm = 0.1799, lr_0 = 2.5499e-04
Loss = 9.5462e-03, PNorm = 181.0062, GNorm = 0.2339, lr_0 = 2.5481e-04
Loss = 5.9838e-03, PNorm = 181.0152, GNorm = 0.2751, lr_0 = 2.5464e-04
Loss = 1.2362e-02, PNorm = 181.0253, GNorm = 0.1000, lr_0 = 2.5446e-04
Loss = 6.8591e-03, PNorm = 181.0338, GNorm = 0.1676, lr_0 = 2.5429e-04
Loss = 1.3108e-02, PNorm = 181.0405, GNorm = 0.1313, lr_0 = 2.5411e-04
Loss = 8.2853e-03, PNorm = 181.0509, GNorm = 0.1866, lr_0 = 2.5394e-04
Loss = 6.8301e-03, PNorm = 181.0600, GNorm = 0.1819, lr_0 = 2.5377e-04
Loss = 8.5029e-03, PNorm = 181.0690, GNorm = 0.2153, lr_0 = 2.5359e-04
Loss = 5.7893e-03, PNorm = 181.0760, GNorm = 0.1703, lr_0 = 2.5342e-04
Loss = 7.0261e-03, PNorm = 181.0812, GNorm = 0.2553, lr_0 = 2.5325e-04
Loss = 5.8841e-03, PNorm = 181.0899, GNorm = 0.1236, lr_0 = 2.5307e-04
Loss = 6.0449e-03, PNorm = 181.0976, GNorm = 0.1228, lr_0 = 2.5290e-04
Loss = 6.9555e-03, PNorm = 181.1073, GNorm = 0.1333, lr_0 = 2.5273e-04
Loss = 7.7679e-03, PNorm = 181.1158, GNorm = 0.1538, lr_0 = 2.5255e-04
Loss = 1.4874e-02, PNorm = 181.1219, GNorm = 0.1776, lr_0 = 2.5238e-04
Loss = 7.6072e-03, PNorm = 181.1310, GNorm = 0.1214, lr_0 = 2.5221e-04
Loss = 5.4705e-03, PNorm = 181.1385, GNorm = 0.2368, lr_0 = 2.5203e-04
Loss = 1.4594e-02, PNorm = 181.1435, GNorm = 0.1174, lr_0 = 2.5186e-04
Loss = 8.0752e-03, PNorm = 181.1587, GNorm = 0.3706, lr_0 = 2.5169e-04
Loss = 1.0790e-02, PNorm = 181.1656, GNorm = 0.3531, lr_0 = 2.5152e-04
Loss = 7.3991e-03, PNorm = 181.1713, GNorm = 0.4055, lr_0 = 2.5134e-04
Loss = 6.6110e-03, PNorm = 181.1802, GNorm = 0.2313, lr_0 = 2.5117e-04
Loss = 9.0643e-03, PNorm = 181.1872, GNorm = 0.0996, lr_0 = 2.5100e-04
Loss = 1.2208e-02, PNorm = 181.1944, GNorm = 0.2055, lr_0 = 2.5083e-04
Loss = 5.8453e-03, PNorm = 181.2035, GNorm = 0.2514, lr_0 = 2.5066e-04
Loss = 5.7124e-03, PNorm = 181.2134, GNorm = 0.1564, lr_0 = 2.5048e-04
Loss = 1.0006e-02, PNorm = 181.2200, GNorm = 0.1494, lr_0 = 2.5031e-04
Loss = 5.3481e-03, PNorm = 181.2267, GNorm = 0.1552, lr_0 = 2.5014e-04
Loss = 9.6498e-03, PNorm = 181.2326, GNorm = 0.3029, lr_0 = 2.4997e-04
Loss = 5.6064e-03, PNorm = 181.2412, GNorm = 0.1489, lr_0 = 2.4980e-04
Loss = 1.0374e-02, PNorm = 181.2505, GNorm = 0.3218, lr_0 = 2.4963e-04
Loss = 7.2250e-03, PNorm = 181.2587, GNorm = 0.3889, lr_0 = 2.4946e-04
Loss = 8.5131e-03, PNorm = 181.2659, GNorm = 0.1519, lr_0 = 2.4929e-04
Loss = 5.7719e-03, PNorm = 181.2754, GNorm = 0.1663, lr_0 = 2.4911e-04
Loss = 6.5370e-03, PNorm = 181.2849, GNorm = 0.2921, lr_0 = 2.4894e-04
Loss = 6.8491e-03, PNorm = 181.2924, GNorm = 0.1223, lr_0 = 2.4877e-04
Loss = 9.9127e-03, PNorm = 181.2992, GNorm = 0.1899, lr_0 = 2.4860e-04
Loss = 6.3009e-03, PNorm = 181.3085, GNorm = 0.2005, lr_0 = 2.4843e-04
Loss = 7.1193e-03, PNorm = 181.3169, GNorm = 0.3347, lr_0 = 2.4826e-04
Loss = 6.0567e-03, PNorm = 181.3249, GNorm = 0.1798, lr_0 = 2.4809e-04
Loss = 5.4832e-03, PNorm = 181.3303, GNorm = 0.1108, lr_0 = 2.4792e-04
Loss = 1.0486e-02, PNorm = 181.3387, GNorm = 0.2733, lr_0 = 2.4775e-04
Loss = 6.7949e-03, PNorm = 181.3489, GNorm = 0.0948, lr_0 = 2.4758e-04
Loss = 7.3372e-03, PNorm = 181.3572, GNorm = 0.3967, lr_0 = 2.4741e-04
Loss = 6.8771e-03, PNorm = 181.3671, GNorm = 0.1803, lr_0 = 2.4724e-04
Loss = 8.0617e-03, PNorm = 181.3751, GNorm = 0.2134, lr_0 = 2.4707e-04
Validation mae = 0.121424
Epoch 19
Loss = 6.0641e-03, PNorm = 181.3824, GNorm = 0.1271, lr_0 = 2.4690e-04
Loss = 4.4812e-03, PNorm = 181.3877, GNorm = 0.1217, lr_0 = 2.4674e-04
Loss = 8.7050e-03, PNorm = 181.3947, GNorm = 0.2021, lr_0 = 2.4657e-04
Loss = 7.6222e-03, PNorm = 181.4007, GNorm = 0.3247, lr_0 = 2.4640e-04
Loss = 5.7813e-03, PNorm = 181.4048, GNorm = 0.4145, lr_0 = 2.4623e-04
Loss = 5.1395e-03, PNorm = 181.4078, GNorm = 0.1730, lr_0 = 2.4606e-04
Loss = 9.2739e-03, PNorm = 181.4120, GNorm = 0.1937, lr_0 = 2.4589e-04
Loss = 5.7524e-03, PNorm = 181.4188, GNorm = 0.2407, lr_0 = 2.4572e-04
Loss = 7.0543e-03, PNorm = 181.4270, GNorm = 0.1872, lr_0 = 2.4556e-04
Loss = 6.4267e-03, PNorm = 181.4324, GNorm = 0.1742, lr_0 = 2.4539e-04
Loss = 5.3551e-03, PNorm = 181.4376, GNorm = 0.1447, lr_0 = 2.4522e-04
Loss = 8.8484e-03, PNorm = 181.4419, GNorm = 0.4856, lr_0 = 2.4505e-04
Loss = 4.8992e-03, PNorm = 181.4486, GNorm = 0.1447, lr_0 = 2.4488e-04
Loss = 5.1601e-03, PNorm = 181.4545, GNorm = 0.2009, lr_0 = 2.4472e-04
Loss = 7.8267e-03, PNorm = 181.4634, GNorm = 0.2950, lr_0 = 2.4455e-04
Loss = 5.4941e-03, PNorm = 181.4721, GNorm = 0.1543, lr_0 = 2.4438e-04
Loss = 5.2414e-03, PNorm = 181.4772, GNorm = 0.0966, lr_0 = 2.4421e-04
Loss = 5.2711e-03, PNorm = 181.4827, GNorm = 0.2613, lr_0 = 2.4405e-04
Loss = 5.7444e-03, PNorm = 181.4864, GNorm = 0.1621, lr_0 = 2.4388e-04
Loss = 4.5053e-03, PNorm = 181.4921, GNorm = 0.1611, lr_0 = 2.4371e-04
Loss = 7.5278e-03, PNorm = 181.4974, GNorm = 0.1911, lr_0 = 2.4354e-04
Loss = 7.7114e-03, PNorm = 181.5032, GNorm = 0.1878, lr_0 = 2.4338e-04
Loss = 5.6214e-03, PNorm = 181.5090, GNorm = 0.0921, lr_0 = 2.4321e-04
Loss = 7.0401e-03, PNorm = 181.5142, GNorm = 0.2433, lr_0 = 2.4304e-04
Loss = 5.7678e-03, PNorm = 181.5200, GNorm = 0.2055, lr_0 = 2.4288e-04
Loss = 6.8050e-03, PNorm = 181.5266, GNorm = 0.2296, lr_0 = 2.4271e-04
Loss = 8.1433e-03, PNorm = 181.5307, GNorm = 0.0989, lr_0 = 2.4254e-04
Loss = 5.2358e-03, PNorm = 181.5357, GNorm = 0.3993, lr_0 = 2.4238e-04
Loss = 5.6335e-03, PNorm = 181.5393, GNorm = 0.1056, lr_0 = 2.4221e-04
Loss = 7.0258e-03, PNorm = 181.5448, GNorm = 0.0684, lr_0 = 2.4205e-04
Loss = 4.5687e-03, PNorm = 181.5487, GNorm = 0.1125, lr_0 = 2.4188e-04
Loss = 1.0699e-02, PNorm = 181.5533, GNorm = 0.5459, lr_0 = 2.4171e-04
Loss = 4.9022e-03, PNorm = 181.5613, GNorm = 0.1389, lr_0 = 2.4155e-04
Loss = 6.1402e-03, PNorm = 181.5698, GNorm = 0.2579, lr_0 = 2.4138e-04
Loss = 7.2229e-03, PNorm = 181.5758, GNorm = 0.5194, lr_0 = 2.4122e-04
Loss = 7.1570e-03, PNorm = 181.5832, GNorm = 0.2304, lr_0 = 2.4105e-04
Loss = 1.2586e-02, PNorm = 181.5908, GNorm = 0.2043, lr_0 = 2.4089e-04
Loss = 5.3222e-03, PNorm = 181.5967, GNorm = 0.2556, lr_0 = 2.4072e-04
Loss = 5.5733e-03, PNorm = 181.6021, GNorm = 0.2882, lr_0 = 2.4056e-04
Loss = 4.7448e-03, PNorm = 181.6067, GNorm = 0.1201, lr_0 = 2.4039e-04
Loss = 1.0761e-02, PNorm = 181.6145, GNorm = 0.2415, lr_0 = 2.4023e-04
Loss = 4.8588e-03, PNorm = 181.6198, GNorm = 0.1233, lr_0 = 2.4006e-04
Loss = 9.8712e-03, PNorm = 181.6240, GNorm = 0.5582, lr_0 = 2.3990e-04
Loss = 8.5447e-03, PNorm = 181.6296, GNorm = 0.4323, lr_0 = 2.3974e-04
Loss = 5.2880e-03, PNorm = 181.6355, GNorm = 0.2101, lr_0 = 2.3957e-04
Loss = 8.5800e-03, PNorm = 181.6414, GNorm = 0.2937, lr_0 = 2.3941e-04
Loss = 6.2923e-03, PNorm = 181.6477, GNorm = 0.1627, lr_0 = 2.3924e-04
Loss = 4.6091e-03, PNorm = 181.6551, GNorm = 0.1003, lr_0 = 2.3908e-04
Loss = 5.2839e-03, PNorm = 181.6620, GNorm = 0.1224, lr_0 = 2.3892e-04
Loss = 4.4362e-03, PNorm = 181.6697, GNorm = 0.1643, lr_0 = 2.3875e-04
Loss = 7.2096e-03, PNorm = 181.6761, GNorm = 0.2040, lr_0 = 2.3859e-04
Loss = 8.1523e-03, PNorm = 181.6802, GNorm = 0.3231, lr_0 = 2.3842e-04
Loss = 5.5877e-03, PNorm = 181.6885, GNorm = 0.1772, lr_0 = 2.3826e-04
Loss = 6.4089e-03, PNorm = 181.6933, GNorm = 0.1536, lr_0 = 2.3810e-04
Loss = 9.7571e-03, PNorm = 181.6988, GNorm = 0.6138, lr_0 = 2.3794e-04
Loss = 6.8151e-03, PNorm = 181.7041, GNorm = 0.2281, lr_0 = 2.3777e-04
Loss = 4.9364e-03, PNorm = 181.7124, GNorm = 0.1266, lr_0 = 2.3761e-04
Loss = 6.7245e-03, PNorm = 181.7188, GNorm = 0.2457, lr_0 = 2.3745e-04
Loss = 5.8510e-03, PNorm = 181.7266, GNorm = 0.1207, lr_0 = 2.3728e-04
Loss = 1.0731e-02, PNorm = 181.7322, GNorm = 0.1641, lr_0 = 2.3712e-04
Loss = 5.8874e-03, PNorm = 181.7380, GNorm = 0.1216, lr_0 = 2.3696e-04
Loss = 7.0007e-03, PNorm = 181.7437, GNorm = 0.0674, lr_0 = 2.3680e-04
Loss = 9.2478e-03, PNorm = 181.7518, GNorm = 0.2895, lr_0 = 2.3663e-04
Loss = 4.2844e-03, PNorm = 181.7576, GNorm = 0.1368, lr_0 = 2.3647e-04
Loss = 7.7936e-03, PNorm = 181.7646, GNorm = 0.2322, lr_0 = 2.3631e-04
Loss = 7.0936e-03, PNorm = 181.7705, GNorm = 0.1474, lr_0 = 2.3615e-04
Loss = 4.7497e-03, PNorm = 181.7787, GNorm = 0.1682, lr_0 = 2.3599e-04
Loss = 4.6104e-03, PNorm = 181.7854, GNorm = 0.2023, lr_0 = 2.3582e-04
Loss = 1.0806e-02, PNorm = 181.7929, GNorm = 0.1838, lr_0 = 2.3566e-04
Loss = 6.2246e-03, PNorm = 181.8010, GNorm = 0.0942, lr_0 = 2.3550e-04
Loss = 5.9858e-03, PNorm = 181.8063, GNorm = 0.1773, lr_0 = 2.3534e-04
Loss = 5.6250e-03, PNorm = 181.8124, GNorm = 0.2530, lr_0 = 2.3518e-04
Loss = 5.3599e-03, PNorm = 181.8197, GNorm = 0.0859, lr_0 = 2.3502e-04
Loss = 6.2146e-03, PNorm = 181.8249, GNorm = 0.2672, lr_0 = 2.3486e-04
Loss = 1.1735e-02, PNorm = 181.8298, GNorm = 0.2345, lr_0 = 2.3470e-04
Loss = 6.5625e-03, PNorm = 181.8337, GNorm = 0.2208, lr_0 = 2.3454e-04
Loss = 7.1603e-03, PNorm = 181.8396, GNorm = 0.2377, lr_0 = 2.3437e-04
Loss = 5.4663e-03, PNorm = 181.8463, GNorm = 0.1039, lr_0 = 2.3421e-04
Loss = 5.6477e-03, PNorm = 181.8549, GNorm = 0.0947, lr_0 = 2.3405e-04
Loss = 1.4177e-02, PNorm = 181.8619, GNorm = 0.5361, lr_0 = 2.3389e-04
Loss = 8.6451e-03, PNorm = 181.8688, GNorm = 0.3382, lr_0 = 2.3373e-04
Loss = 6.9606e-03, PNorm = 181.8771, GNorm = 0.0970, lr_0 = 2.3357e-04
Loss = 5.2654e-03, PNorm = 181.8839, GNorm = 0.2150, lr_0 = 2.3341e-04
Loss = 4.4029e-03, PNorm = 181.8909, GNorm = 0.0809, lr_0 = 2.3325e-04
Loss = 9.9293e-03, PNorm = 181.8951, GNorm = 0.1909, lr_0 = 2.3309e-04
Loss = 6.6972e-03, PNorm = 181.9030, GNorm = 0.1910, lr_0 = 2.3293e-04
Loss = 4.3775e-03, PNorm = 181.9093, GNorm = 0.1386, lr_0 = 2.3277e-04
Loss = 4.4420e-03, PNorm = 181.9175, GNorm = 0.2970, lr_0 = 2.3261e-04
Loss = 5.8552e-03, PNorm = 181.9254, GNorm = 0.1752, lr_0 = 2.3246e-04
Loss = 1.3746e-02, PNorm = 181.9333, GNorm = 0.0924, lr_0 = 2.3230e-04
Loss = 5.4080e-03, PNorm = 181.9383, GNorm = 0.1764, lr_0 = 2.3214e-04
Loss = 4.2804e-03, PNorm = 181.9461, GNorm = 0.1813, lr_0 = 2.3198e-04
Loss = 9.2769e-03, PNorm = 181.9524, GNorm = 0.0992, lr_0 = 2.3182e-04
Loss = 7.6334e-03, PNorm = 181.9582, GNorm = 0.2234, lr_0 = 2.3166e-04
Loss = 1.5265e-02, PNorm = 181.9665, GNorm = 0.3334, lr_0 = 2.3150e-04
Loss = 6.8362e-03, PNorm = 181.9738, GNorm = 0.2674, lr_0 = 2.3134e-04
Loss = 6.2407e-03, PNorm = 181.9784, GNorm = 0.2485, lr_0 = 2.3118e-04
Loss = 7.9990e-03, PNorm = 181.9866, GNorm = 0.2963, lr_0 = 2.3103e-04
Loss = 4.3727e-03, PNorm = 181.9929, GNorm = 0.1162, lr_0 = 2.3087e-04
Loss = 6.9556e-03, PNorm = 181.9979, GNorm = 0.4403, lr_0 = 2.3071e-04
Loss = 6.3055e-03, PNorm = 182.0038, GNorm = 0.1743, lr_0 = 2.3055e-04
Loss = 6.8299e-03, PNorm = 182.0106, GNorm = 0.1157, lr_0 = 2.3039e-04
Loss = 1.1240e-02, PNorm = 182.0187, GNorm = 0.3738, lr_0 = 2.3024e-04
Loss = 6.7151e-03, PNorm = 182.0254, GNorm = 0.0862, lr_0 = 2.3008e-04
Loss = 7.8774e-03, PNorm = 182.0327, GNorm = 0.1155, lr_0 = 2.2992e-04
Loss = 4.1292e-03, PNorm = 182.0395, GNorm = 0.1176, lr_0 = 2.2976e-04
Loss = 5.7176e-03, PNorm = 182.0457, GNorm = 0.1147, lr_0 = 2.2961e-04
Loss = 5.3889e-03, PNorm = 182.0499, GNorm = 0.2262, lr_0 = 2.2945e-04
Loss = 6.5724e-03, PNorm = 182.0571, GNorm = 0.1726, lr_0 = 2.2929e-04
Loss = 5.9089e-03, PNorm = 182.0667, GNorm = 0.2858, lr_0 = 2.2913e-04
Loss = 1.1504e-02, PNorm = 182.0749, GNorm = 0.1748, lr_0 = 2.2898e-04
Loss = 1.1233e-02, PNorm = 182.0840, GNorm = 0.5651, lr_0 = 2.2882e-04
Loss = 1.0219e-02, PNorm = 182.0898, GNorm = 0.2278, lr_0 = 2.2866e-04
Loss = 1.4758e-02, PNorm = 182.0985, GNorm = 0.3141, lr_0 = 2.2851e-04
Loss = 7.5842e-03, PNorm = 182.1061, GNorm = 0.1621, lr_0 = 2.2835e-04
Loss = 7.1770e-03, PNorm = 182.1137, GNorm = 0.1670, lr_0 = 2.2819e-04
Loss = 1.0277e-02, PNorm = 182.1233, GNorm = 0.7814, lr_0 = 2.2804e-04
Loss = 7.4081e-03, PNorm = 182.1304, GNorm = 0.4448, lr_0 = 2.2788e-04
Loss = 5.4672e-03, PNorm = 182.1353, GNorm = 0.2916, lr_0 = 2.2773e-04
Loss = 2.3500e-02, PNorm = 182.1419, GNorm = 2.7040, lr_0 = 2.2757e-04
Validation mae = 0.121204
Epoch 20
Loss = 6.1371e-03, PNorm = 182.1499, GNorm = 0.1147, lr_0 = 2.2741e-04
Loss = 7.4422e-03, PNorm = 182.1541, GNorm = 0.2380, lr_0 = 2.2726e-04
Loss = 4.6149e-03, PNorm = 182.1609, GNorm = 0.1136, lr_0 = 2.2710e-04
Loss = 6.9329e-03, PNorm = 182.1680, GNorm = 0.1192, lr_0 = 2.2695e-04
Loss = 4.4302e-03, PNorm = 182.1723, GNorm = 0.1615, lr_0 = 2.2679e-04
Loss = 8.8384e-03, PNorm = 182.1783, GNorm = 0.3786, lr_0 = 2.2664e-04
Loss = 1.0680e-02, PNorm = 182.1829, GNorm = 0.3089, lr_0 = 2.2648e-04
Loss = 6.9518e-03, PNorm = 182.1862, GNorm = 0.0926, lr_0 = 2.2632e-04
Loss = 6.7971e-03, PNorm = 182.1930, GNorm = 0.3126, lr_0 = 2.2617e-04
Loss = 5.0500e-03, PNorm = 182.1993, GNorm = 0.1095, lr_0 = 2.2601e-04
Loss = 1.1433e-02, PNorm = 182.2038, GNorm = 0.3366, lr_0 = 2.2586e-04
Loss = 4.6876e-03, PNorm = 182.2119, GNorm = 0.1104, lr_0 = 2.2571e-04
Loss = 5.1101e-03, PNorm = 182.2167, GNorm = 0.2753, lr_0 = 2.2555e-04
Loss = 9.0667e-03, PNorm = 182.2221, GNorm = 0.1894, lr_0 = 2.2540e-04
Loss = 4.5785e-03, PNorm = 182.2279, GNorm = 0.0908, lr_0 = 2.2524e-04
Loss = 7.4878e-03, PNorm = 182.2321, GNorm = 0.1777, lr_0 = 2.2509e-04
Loss = 4.5445e-03, PNorm = 182.2364, GNorm = 0.3110, lr_0 = 2.2493e-04
Loss = 7.0320e-03, PNorm = 182.2433, GNorm = 0.1357, lr_0 = 2.2478e-04
Loss = 5.0156e-03, PNorm = 182.2482, GNorm = 0.1406, lr_0 = 2.2463e-04
Loss = 6.7781e-03, PNorm = 182.2516, GNorm = 0.1479, lr_0 = 2.2447e-04
Loss = 4.0597e-03, PNorm = 182.2549, GNorm = 0.1098, lr_0 = 2.2432e-04
Loss = 5.0372e-03, PNorm = 182.2588, GNorm = 0.1896, lr_0 = 2.2416e-04
Loss = 5.0620e-03, PNorm = 182.2634, GNorm = 0.1273, lr_0 = 2.2401e-04
Loss = 5.2484e-03, PNorm = 182.2659, GNorm = 0.1982, lr_0 = 2.2386e-04
Loss = 4.6555e-03, PNorm = 182.2710, GNorm = 0.2117, lr_0 = 2.2370e-04
Loss = 7.5032e-03, PNorm = 182.2767, GNorm = 0.2103, lr_0 = 2.2355e-04
Loss = 4.4727e-03, PNorm = 182.2806, GNorm = 0.2110, lr_0 = 2.2340e-04
Loss = 5.1996e-03, PNorm = 182.2860, GNorm = 0.0944, lr_0 = 2.2324e-04
Loss = 5.9957e-03, PNorm = 182.2920, GNorm = 0.1751, lr_0 = 2.2309e-04
Loss = 4.6631e-03, PNorm = 182.2968, GNorm = 0.2894, lr_0 = 2.2294e-04
Loss = 5.0088e-03, PNorm = 182.3026, GNorm = 0.0775, lr_0 = 2.2279e-04
Loss = 7.1564e-03, PNorm = 182.3077, GNorm = 0.1227, lr_0 = 2.2263e-04
Loss = 4.4017e-03, PNorm = 182.3133, GNorm = 0.1290, lr_0 = 2.2248e-04
Loss = 6.6075e-03, PNorm = 182.3202, GNorm = 0.1627, lr_0 = 2.2233e-04
Loss = 4.1952e-03, PNorm = 182.3263, GNorm = 0.3030, lr_0 = 2.2218e-04
Loss = 6.7006e-03, PNorm = 182.3333, GNorm = 0.2228, lr_0 = 2.2202e-04
Loss = 7.3928e-03, PNorm = 182.3378, GNorm = 0.1378, lr_0 = 2.2187e-04
Loss = 8.6556e-03, PNorm = 182.3451, GNorm = 0.1349, lr_0 = 2.2172e-04
Loss = 4.5003e-03, PNorm = 182.3507, GNorm = 0.3333, lr_0 = 2.2157e-04
Loss = 4.2372e-03, PNorm = 182.3562, GNorm = 0.1931, lr_0 = 2.2142e-04
Loss = 4.7727e-03, PNorm = 182.3601, GNorm = 0.1516, lr_0 = 2.2126e-04
Loss = 6.0140e-03, PNorm = 182.3638, GNorm = 0.2161, lr_0 = 2.2111e-04
Loss = 8.6297e-03, PNorm = 182.3708, GNorm = 0.1250, lr_0 = 2.2096e-04
Loss = 4.4683e-03, PNorm = 182.3764, GNorm = 0.1046, lr_0 = 2.2081e-04
Loss = 4.9355e-03, PNorm = 182.3833, GNorm = 0.1568, lr_0 = 2.2066e-04
Loss = 4.6551e-03, PNorm = 182.3904, GNorm = 0.0871, lr_0 = 2.2051e-04
Loss = 4.7946e-03, PNorm = 182.3947, GNorm = 0.1802, lr_0 = 2.2036e-04
Loss = 5.2440e-03, PNorm = 182.3995, GNorm = 0.1526, lr_0 = 2.2021e-04
Loss = 5.5693e-03, PNorm = 182.4040, GNorm = 0.2712, lr_0 = 2.2005e-04
Loss = 3.7698e-03, PNorm = 182.4055, GNorm = 0.4365, lr_0 = 2.1990e-04
Loss = 4.1383e-03, PNorm = 182.4108, GNorm = 0.1594, lr_0 = 2.1975e-04
Loss = 5.8271e-03, PNorm = 182.4146, GNorm = 0.0907, lr_0 = 2.1960e-04
Loss = 4.9324e-03, PNorm = 182.4202, GNorm = 0.1847, lr_0 = 2.1945e-04
Loss = 4.9355e-03, PNorm = 182.4282, GNorm = 0.3691, lr_0 = 2.1930e-04
Loss = 1.0206e-02, PNorm = 182.4317, GNorm = 0.1424, lr_0 = 2.1915e-04
Loss = 8.7771e-03, PNorm = 182.4374, GNorm = 0.1369, lr_0 = 2.1900e-04
Loss = 8.4572e-03, PNorm = 182.4429, GNorm = 0.2514, lr_0 = 2.1885e-04
Loss = 6.9436e-03, PNorm = 182.4494, GNorm = 0.1601, lr_0 = 2.1870e-04
Loss = 1.0537e-02, PNorm = 182.4560, GNorm = 0.1028, lr_0 = 2.1855e-04
Loss = 7.3303e-03, PNorm = 182.4603, GNorm = 0.0934, lr_0 = 2.1840e-04
Loss = 6.5699e-03, PNorm = 182.4641, GNorm = 0.1167, lr_0 = 2.1825e-04
Loss = 7.6596e-03, PNorm = 182.4676, GNorm = 0.2543, lr_0 = 2.1810e-04
Loss = 9.6910e-03, PNorm = 182.4727, GNorm = 0.1229, lr_0 = 2.1795e-04
Loss = 5.2640e-03, PNorm = 182.4762, GNorm = 0.2340, lr_0 = 2.1780e-04
Loss = 4.9077e-03, PNorm = 182.4810, GNorm = 0.2659, lr_0 = 2.1765e-04
Loss = 6.5643e-03, PNorm = 182.4858, GNorm = 0.3190, lr_0 = 2.1751e-04
Loss = 6.6273e-03, PNorm = 182.4897, GNorm = 0.1501, lr_0 = 2.1736e-04
Loss = 7.2928e-03, PNorm = 182.4932, GNorm = 0.1985, lr_0 = 2.1721e-04
Loss = 6.2025e-03, PNorm = 182.5022, GNorm = 0.2217, lr_0 = 2.1706e-04
Loss = 5.2844e-03, PNorm = 182.5100, GNorm = 0.0872, lr_0 = 2.1691e-04
Loss = 4.8320e-03, PNorm = 182.5165, GNorm = 0.1775, lr_0 = 2.1676e-04
Loss = 5.3433e-03, PNorm = 182.5226, GNorm = 0.1318, lr_0 = 2.1661e-04
Loss = 5.4161e-03, PNorm = 182.5288, GNorm = 0.1658, lr_0 = 2.1646e-04
Loss = 4.4435e-03, PNorm = 182.5341, GNorm = 0.2440, lr_0 = 2.1632e-04
Loss = 7.1460e-03, PNorm = 182.5380, GNorm = 0.3073, lr_0 = 2.1617e-04
Loss = 7.0556e-03, PNorm = 182.5452, GNorm = 0.5563, lr_0 = 2.1602e-04
Loss = 5.5199e-03, PNorm = 182.5523, GNorm = 0.1124, lr_0 = 2.1587e-04
Loss = 7.9120e-03, PNorm = 182.5559, GNorm = 0.0798, lr_0 = 2.1572e-04
Loss = 5.4814e-03, PNorm = 182.5603, GNorm = 0.1389, lr_0 = 2.1558e-04
Loss = 7.1471e-03, PNorm = 182.5666, GNorm = 0.1968, lr_0 = 2.1543e-04
Loss = 3.9705e-03, PNorm = 182.5748, GNorm = 0.0907, lr_0 = 2.1528e-04
Loss = 5.0290e-03, PNorm = 182.5841, GNorm = 0.1306, lr_0 = 2.1513e-04
Loss = 3.8275e-03, PNorm = 182.5887, GNorm = 0.0976, lr_0 = 2.1499e-04
Loss = 4.1057e-03, PNorm = 182.5943, GNorm = 0.1069, lr_0 = 2.1484e-04
Loss = 1.2493e-02, PNorm = 182.6000, GNorm = 0.2830, lr_0 = 2.1469e-04
Loss = 6.7339e-03, PNorm = 182.6036, GNorm = 0.1200, lr_0 = 2.1454e-04
Loss = 3.5865e-03, PNorm = 182.6091, GNorm = 0.1034, lr_0 = 2.1440e-04
Loss = 3.8450e-03, PNorm = 182.6142, GNorm = 0.2252, lr_0 = 2.1425e-04
Loss = 4.6402e-03, PNorm = 182.6186, GNorm = 0.2288, lr_0 = 2.1410e-04
Loss = 4.1841e-03, PNorm = 182.6233, GNorm = 0.1621, lr_0 = 2.1396e-04
Loss = 7.2098e-03, PNorm = 182.6293, GNorm = 0.3265, lr_0 = 2.1381e-04
Loss = 8.9439e-03, PNorm = 182.6350, GNorm = 0.3625, lr_0 = 2.1366e-04
Loss = 3.7153e-03, PNorm = 182.6398, GNorm = 0.1792, lr_0 = 2.1352e-04
Loss = 6.9588e-03, PNorm = 182.6446, GNorm = 0.2471, lr_0 = 2.1337e-04
Loss = 9.7926e-03, PNorm = 182.6493, GNorm = 0.1518, lr_0 = 2.1323e-04
Loss = 5.2762e-03, PNorm = 182.6560, GNorm = 0.2290, lr_0 = 2.1308e-04
Loss = 6.8108e-03, PNorm = 182.6625, GNorm = 0.1016, lr_0 = 2.1293e-04
Loss = 6.8915e-03, PNorm = 182.6679, GNorm = 0.1537, lr_0 = 2.1279e-04
Loss = 5.5312e-03, PNorm = 182.6726, GNorm = 0.2942, lr_0 = 2.1264e-04
Loss = 5.1779e-03, PNorm = 182.6769, GNorm = 0.2105, lr_0 = 2.1250e-04
Loss = 5.6470e-03, PNorm = 182.6809, GNorm = 0.1401, lr_0 = 2.1235e-04
Loss = 3.9161e-03, PNorm = 182.6854, GNorm = 0.1044, lr_0 = 2.1221e-04
Loss = 4.1132e-03, PNorm = 182.6897, GNorm = 0.0942, lr_0 = 2.1206e-04
Loss = 1.6399e-02, PNorm = 182.6953, GNorm = 0.1591, lr_0 = 2.1191e-04
Loss = 6.0885e-03, PNorm = 182.6989, GNorm = 0.2450, lr_0 = 2.1177e-04
Loss = 9.8637e-03, PNorm = 182.7020, GNorm = 0.2370, lr_0 = 2.1162e-04
Loss = 9.4725e-03, PNorm = 182.7121, GNorm = 0.2685, lr_0 = 2.1148e-04
Loss = 4.5950e-03, PNorm = 182.7173, GNorm = 0.1334, lr_0 = 2.1133e-04
Loss = 4.9109e-03, PNorm = 182.7233, GNorm = 0.1509, lr_0 = 2.1119e-04
Loss = 7.6783e-03, PNorm = 182.7300, GNorm = 0.2203, lr_0 = 2.1104e-04
Loss = 6.5301e-03, PNorm = 182.7347, GNorm = 0.0604, lr_0 = 2.1090e-04
Loss = 8.4002e-03, PNorm = 182.7388, GNorm = 0.2204, lr_0 = 2.1076e-04
Loss = 1.6012e-02, PNorm = 182.7437, GNorm = 0.2859, lr_0 = 2.1061e-04
Loss = 5.2644e-03, PNorm = 182.7526, GNorm = 0.1667, lr_0 = 2.1047e-04
Loss = 4.1740e-03, PNorm = 182.7565, GNorm = 0.1749, lr_0 = 2.1032e-04
Loss = 1.3688e-02, PNorm = 182.7628, GNorm = 0.1541, lr_0 = 2.1018e-04
Loss = 9.5810e-03, PNorm = 182.7697, GNorm = 0.1687, lr_0 = 2.1003e-04
Loss = 3.6487e-03, PNorm = 182.7764, GNorm = 0.1554, lr_0 = 2.0989e-04
Loss = 5.7188e-03, PNorm = 182.7822, GNorm = 0.2366, lr_0 = 2.0975e-04
Loss = 5.4128e-03, PNorm = 182.7892, GNorm = 0.1049, lr_0 = 2.0960e-04
Validation mae = 0.121341
Epoch 21
Loss = 4.3966e-03, PNorm = 182.7949, GNorm = 0.2491, lr_0 = 2.0946e-04
Loss = 6.3241e-03, PNorm = 182.8014, GNorm = 0.0804, lr_0 = 2.0932e-04
Loss = 6.0187e-03, PNorm = 182.8061, GNorm = 0.1822, lr_0 = 2.0917e-04
Loss = 3.9844e-03, PNorm = 182.8114, GNorm = 0.1052, lr_0 = 2.0903e-04
Loss = 4.6643e-03, PNorm = 182.8156, GNorm = 0.1428, lr_0 = 2.0889e-04
Loss = 6.9133e-03, PNorm = 182.8197, GNorm = 0.1761, lr_0 = 2.0874e-04
Loss = 4.2860e-03, PNorm = 182.8238, GNorm = 0.1421, lr_0 = 2.0860e-04
Loss = 7.7484e-03, PNorm = 182.8282, GNorm = 0.4278, lr_0 = 2.0846e-04
Loss = 3.6635e-03, PNorm = 182.8324, GNorm = 0.1680, lr_0 = 2.0831e-04
Loss = 4.0118e-03, PNorm = 182.8379, GNorm = 0.1936, lr_0 = 2.0817e-04
Loss = 5.4661e-03, PNorm = 182.8427, GNorm = 0.2208, lr_0 = 2.0803e-04
Loss = 5.4989e-03, PNorm = 182.8463, GNorm = 0.4012, lr_0 = 2.0789e-04
Loss = 4.2666e-03, PNorm = 182.8486, GNorm = 0.1289, lr_0 = 2.0774e-04
Loss = 7.7545e-03, PNorm = 182.8515, GNorm = 0.1761, lr_0 = 2.0760e-04
Loss = 7.1956e-03, PNorm = 182.8587, GNorm = 0.2322, lr_0 = 2.0746e-04
Loss = 3.6774e-03, PNorm = 182.8640, GNorm = 0.1218, lr_0 = 2.0732e-04
Loss = 3.9993e-03, PNorm = 182.8681, GNorm = 0.1758, lr_0 = 2.0718e-04
Loss = 5.1494e-03, PNorm = 182.8729, GNorm = 0.5509, lr_0 = 2.0703e-04
Loss = 6.7167e-03, PNorm = 182.8771, GNorm = 0.1635, lr_0 = 2.0689e-04
Loss = 3.7616e-03, PNorm = 182.8822, GNorm = 0.1737, lr_0 = 2.0675e-04
Loss = 3.5571e-03, PNorm = 182.8869, GNorm = 0.0916, lr_0 = 2.0661e-04
Loss = 3.2658e-03, PNorm = 182.8899, GNorm = 0.2283, lr_0 = 2.0647e-04
Loss = 7.5423e-03, PNorm = 182.8931, GNorm = 0.1943, lr_0 = 2.0633e-04
Loss = 3.4791e-03, PNorm = 182.8971, GNorm = 0.0703, lr_0 = 2.0618e-04
Loss = 6.4943e-03, PNorm = 182.9026, GNorm = 0.1104, lr_0 = 2.0604e-04
Loss = 5.1900e-03, PNorm = 182.9073, GNorm = 0.1341, lr_0 = 2.0590e-04
Loss = 5.9373e-03, PNorm = 182.9106, GNorm = 0.0940, lr_0 = 2.0576e-04
Loss = 8.2809e-03, PNorm = 182.9153, GNorm = 0.1861, lr_0 = 2.0562e-04
Loss = 4.3938e-03, PNorm = 182.9195, GNorm = 0.0888, lr_0 = 2.0548e-04
Loss = 4.9524e-03, PNorm = 182.9232, GNorm = 0.1691, lr_0 = 2.0534e-04
Loss = 5.3390e-03, PNorm = 182.9280, GNorm = 0.1845, lr_0 = 2.0520e-04
Loss = 4.8002e-03, PNorm = 182.9324, GNorm = 0.0684, lr_0 = 2.0506e-04
Loss = 4.1560e-03, PNorm = 182.9368, GNorm = 0.4188, lr_0 = 2.0492e-04
Loss = 3.3982e-03, PNorm = 182.9414, GNorm = 0.1362, lr_0 = 2.0478e-04
Loss = 4.7390e-03, PNorm = 182.9462, GNorm = 0.4457, lr_0 = 2.0464e-04
Loss = 3.6107e-03, PNorm = 182.9497, GNorm = 0.1349, lr_0 = 2.0450e-04
Loss = 7.0588e-03, PNorm = 182.9528, GNorm = 0.0832, lr_0 = 2.0436e-04
Loss = 5.1651e-03, PNorm = 182.9569, GNorm = 0.0962, lr_0 = 2.0422e-04
Loss = 4.5687e-03, PNorm = 182.9609, GNorm = 0.1406, lr_0 = 2.0408e-04
Loss = 3.6259e-03, PNorm = 182.9663, GNorm = 0.0960, lr_0 = 2.0394e-04
Loss = 6.2827e-03, PNorm = 182.9689, GNorm = 0.1080, lr_0 = 2.0380e-04
Loss = 6.0934e-03, PNorm = 182.9707, GNorm = 0.1929, lr_0 = 2.0366e-04
Loss = 6.2889e-03, PNorm = 182.9757, GNorm = 0.1004, lr_0 = 2.0352e-04
Loss = 5.6853e-03, PNorm = 182.9808, GNorm = 0.1492, lr_0 = 2.0338e-04
Loss = 4.4777e-03, PNorm = 182.9852, GNorm = 0.1092, lr_0 = 2.0324e-04
Loss = 1.1336e-02, PNorm = 182.9897, GNorm = 2.1990, lr_0 = 2.0310e-04
Loss = 8.0203e-03, PNorm = 182.9926, GNorm = 0.2894, lr_0 = 2.0296e-04
Loss = 5.5835e-03, PNorm = 182.9981, GNorm = 0.2270, lr_0 = 2.0282e-04
Loss = 8.1408e-03, PNorm = 183.0029, GNorm = 0.2598, lr_0 = 2.0268e-04
Loss = 3.6455e-03, PNorm = 183.0080, GNorm = 0.1025, lr_0 = 2.0254e-04
Loss = 4.9063e-03, PNorm = 183.0132, GNorm = 0.1643, lr_0 = 2.0240e-04
Loss = 6.1444e-03, PNorm = 183.0158, GNorm = 0.1294, lr_0 = 2.0227e-04
Loss = 3.8578e-03, PNorm = 183.0208, GNorm = 0.1756, lr_0 = 2.0213e-04
Loss = 4.9593e-03, PNorm = 183.0280, GNorm = 0.1208, lr_0 = 2.0199e-04
Loss = 5.0907e-03, PNorm = 183.0324, GNorm = 0.1073, lr_0 = 2.0185e-04
Loss = 3.7211e-03, PNorm = 183.0365, GNorm = 0.1141, lr_0 = 2.0171e-04
Loss = 3.9266e-03, PNorm = 183.0410, GNorm = 0.1406, lr_0 = 2.0157e-04
Loss = 4.5136e-03, PNorm = 183.0433, GNorm = 0.0971, lr_0 = 2.0144e-04
Loss = 3.4983e-03, PNorm = 183.0476, GNorm = 0.0816, lr_0 = 2.0130e-04
Loss = 7.7563e-03, PNorm = 183.0546, GNorm = 0.0969, lr_0 = 2.0116e-04
Loss = 3.7108e-03, PNorm = 183.0616, GNorm = 0.1701, lr_0 = 2.0102e-04
Loss = 3.3950e-03, PNorm = 183.0660, GNorm = 0.2457, lr_0 = 2.0088e-04
Loss = 4.2925e-03, PNorm = 183.0683, GNorm = 0.1947, lr_0 = 2.0075e-04
Loss = 6.8838e-03, PNorm = 183.0719, GNorm = 0.1893, lr_0 = 2.0061e-04
Loss = 6.3446e-03, PNorm = 183.0753, GNorm = 0.1285, lr_0 = 2.0047e-04
Loss = 1.0138e-02, PNorm = 183.0788, GNorm = 0.2507, lr_0 = 2.0033e-04
Loss = 3.3719e-03, PNorm = 183.0848, GNorm = 0.1095, lr_0 = 2.0020e-04
Loss = 7.8580e-03, PNorm = 183.0896, GNorm = 0.0830, lr_0 = 2.0006e-04
Loss = 4.8285e-03, PNorm = 183.0944, GNorm = 0.1612, lr_0 = 1.9992e-04
Loss = 3.7785e-03, PNorm = 183.0996, GNorm = 0.2186, lr_0 = 1.9979e-04
Loss = 5.3723e-03, PNorm = 183.1031, GNorm = 0.0892, lr_0 = 1.9965e-04
Loss = 5.3130e-03, PNorm = 183.1069, GNorm = 0.0774, lr_0 = 1.9951e-04
Loss = 6.5313e-03, PNorm = 183.1115, GNorm = 0.2192, lr_0 = 1.9938e-04
Loss = 4.9411e-03, PNorm = 183.1163, GNorm = 0.2072, lr_0 = 1.9924e-04
Loss = 8.6275e-03, PNorm = 183.1196, GNorm = 0.1992, lr_0 = 1.9910e-04
Loss = 3.8621e-03, PNorm = 183.1232, GNorm = 0.2184, lr_0 = 1.9897e-04
Loss = 7.0158e-03, PNorm = 183.1247, GNorm = 0.1191, lr_0 = 1.9883e-04
Loss = 1.8957e-02, PNorm = 183.1306, GNorm = 0.1681, lr_0 = 1.9869e-04
Loss = 4.7148e-03, PNorm = 183.1333, GNorm = 0.2082, lr_0 = 1.9856e-04
Loss = 5.5929e-03, PNorm = 183.1382, GNorm = 0.2584, lr_0 = 1.9842e-04
Loss = 8.0502e-03, PNorm = 183.1448, GNorm = 0.2109, lr_0 = 1.9829e-04
Loss = 4.0114e-03, PNorm = 183.1502, GNorm = 0.0892, lr_0 = 1.9815e-04
Loss = 5.8362e-03, PNorm = 183.1537, GNorm = 0.3765, lr_0 = 1.9801e-04
Loss = 5.1865e-03, PNorm = 183.1583, GNorm = 0.1407, lr_0 = 1.9788e-04
Loss = 3.7628e-03, PNorm = 183.1654, GNorm = 0.1125, lr_0 = 1.9774e-04
Loss = 5.6742e-03, PNorm = 183.1712, GNorm = 0.1837, lr_0 = 1.9761e-04
Loss = 7.2846e-03, PNorm = 183.1751, GNorm = 1.4078, lr_0 = 1.9747e-04
Loss = 6.4606e-03, PNorm = 183.1793, GNorm = 0.1227, lr_0 = 1.9734e-04
Loss = 3.6039e-03, PNorm = 183.1835, GNorm = 0.1936, lr_0 = 1.9720e-04
Loss = 3.7759e-03, PNorm = 183.1885, GNorm = 0.1214, lr_0 = 1.9707e-04
Loss = 6.6423e-03, PNorm = 183.1905, GNorm = 0.3145, lr_0 = 1.9693e-04
Loss = 3.1699e-03, PNorm = 183.1915, GNorm = 0.0926, lr_0 = 1.9680e-04
Loss = 4.2443e-03, PNorm = 183.1948, GNorm = 0.0910, lr_0 = 1.9666e-04
Loss = 3.5368e-03, PNorm = 183.1997, GNorm = 0.1424, lr_0 = 1.9653e-04
Loss = 4.0037e-03, PNorm = 183.2048, GNorm = 0.2914, lr_0 = 1.9639e-04
Loss = 8.0982e-03, PNorm = 183.2089, GNorm = 0.0666, lr_0 = 1.9626e-04
Loss = 9.0833e-03, PNorm = 183.2117, GNorm = 0.1561, lr_0 = 1.9612e-04
Loss = 4.1064e-03, PNorm = 183.2147, GNorm = 0.1413, lr_0 = 1.9599e-04
Loss = 4.1729e-03, PNorm = 183.2183, GNorm = 0.2626, lr_0 = 1.9585e-04
Loss = 1.0919e-02, PNorm = 183.2216, GNorm = 0.2245, lr_0 = 1.9572e-04
Loss = 5.4072e-03, PNorm = 183.2266, GNorm = 0.0977, lr_0 = 1.9559e-04
Loss = 1.0919e-02, PNorm = 183.2325, GNorm = 0.3997, lr_0 = 1.9545e-04
Loss = 8.5792e-03, PNorm = 183.2379, GNorm = 0.1975, lr_0 = 1.9532e-04
Loss = 9.0925e-03, PNorm = 183.2439, GNorm = 0.2759, lr_0 = 1.9518e-04
Loss = 4.8544e-03, PNorm = 183.2511, GNorm = 0.0868, lr_0 = 1.9505e-04
Loss = 4.4056e-03, PNorm = 183.2583, GNorm = 0.3500, lr_0 = 1.9492e-04
Loss = 1.1574e-02, PNorm = 183.2650, GNorm = 0.1490, lr_0 = 1.9478e-04
Loss = 6.3192e-03, PNorm = 183.2723, GNorm = 0.1249, lr_0 = 1.9465e-04
Loss = 3.8581e-03, PNorm = 183.2783, GNorm = 0.0780, lr_0 = 1.9452e-04
Loss = 5.2437e-03, PNorm = 183.2830, GNorm = 0.1131, lr_0 = 1.9438e-04
Loss = 4.1304e-03, PNorm = 183.2879, GNorm = 0.0968, lr_0 = 1.9425e-04
Loss = 3.2390e-03, PNorm = 183.2922, GNorm = 0.1194, lr_0 = 1.9412e-04
Loss = 8.1247e-03, PNorm = 183.2965, GNorm = 0.1034, lr_0 = 1.9398e-04
Loss = 2.9921e-03, PNorm = 183.2994, GNorm = 0.0966, lr_0 = 1.9385e-04
Loss = 9.3994e-03, PNorm = 183.3037, GNorm = 0.4899, lr_0 = 1.9372e-04
Loss = 5.9367e-03, PNorm = 183.3072, GNorm = 0.3875, lr_0 = 1.9359e-04
Loss = 5.9801e-03, PNorm = 183.3120, GNorm = 0.2864, lr_0 = 1.9345e-04
Loss = 6.1626e-03, PNorm = 183.3156, GNorm = 0.1180, lr_0 = 1.9332e-04
Loss = 3.4103e-03, PNorm = 183.3205, GNorm = 0.0886, lr_0 = 1.9319e-04
Loss = 1.1232e-02, PNorm = 183.3246, GNorm = 0.2181, lr_0 = 1.9306e-04
Validation mae = 0.121095
Epoch 22
Loss = 6.5019e-03, PNorm = 183.3299, GNorm = 0.3058, lr_0 = 1.9292e-04
Loss = 4.0645e-03, PNorm = 183.3353, GNorm = 0.0871, lr_0 = 1.9279e-04
Loss = 5.8423e-03, PNorm = 183.3368, GNorm = 0.0819, lr_0 = 1.9266e-04
Loss = 3.4312e-03, PNorm = 183.3392, GNorm = 0.1354, lr_0 = 1.9253e-04
Loss = 7.0861e-03, PNorm = 183.3431, GNorm = 0.1063, lr_0 = 1.9240e-04
Loss = 8.0763e-03, PNorm = 183.3470, GNorm = 0.1586, lr_0 = 1.9226e-04
Loss = 5.7827e-03, PNorm = 183.3523, GNorm = 0.0788, lr_0 = 1.9213e-04
Loss = 7.0657e-03, PNorm = 183.3552, GNorm = 0.1143, lr_0 = 1.9200e-04
Loss = 4.0121e-03, PNorm = 183.3587, GNorm = 0.0960, lr_0 = 1.9187e-04
Loss = 4.3469e-03, PNorm = 183.3623, GNorm = 0.2168, lr_0 = 1.9174e-04
Loss = 4.7282e-03, PNorm = 183.3657, GNorm = 0.2146, lr_0 = 1.9161e-04
Loss = 4.2065e-03, PNorm = 183.3695, GNorm = 0.1098, lr_0 = 1.9148e-04
Loss = 3.6448e-03, PNorm = 183.3731, GNorm = 0.1116, lr_0 = 1.9134e-04
Loss = 3.9287e-03, PNorm = 183.3756, GNorm = 0.1238, lr_0 = 1.9121e-04
Loss = 2.7225e-03, PNorm = 183.3783, GNorm = 0.1327, lr_0 = 1.9108e-04
Loss = 5.3946e-03, PNorm = 183.3823, GNorm = 0.1073, lr_0 = 1.9095e-04
Loss = 3.2455e-03, PNorm = 183.3878, GNorm = 0.1903, lr_0 = 1.9082e-04
Loss = 2.8685e-03, PNorm = 183.3907, GNorm = 0.0921, lr_0 = 1.9069e-04
Loss = 4.6414e-03, PNorm = 183.3930, GNorm = 0.1051, lr_0 = 1.9056e-04
Loss = 5.3636e-03, PNorm = 183.3955, GNorm = 0.1311, lr_0 = 1.9043e-04
Loss = 3.6730e-03, PNorm = 183.3978, GNorm = 0.1344, lr_0 = 1.9030e-04
Loss = 6.8776e-03, PNorm = 183.4008, GNorm = 0.1284, lr_0 = 1.9017e-04
Loss = 3.4454e-03, PNorm = 183.4045, GNorm = 0.0659, lr_0 = 1.9004e-04
Loss = 3.1035e-03, PNorm = 183.4076, GNorm = 0.1639, lr_0 = 1.8991e-04
Loss = 3.2040e-03, PNorm = 183.4092, GNorm = 0.1957, lr_0 = 1.8978e-04
Loss = 5.7194e-03, PNorm = 183.4117, GNorm = 0.1687, lr_0 = 1.8965e-04
Loss = 4.6376e-03, PNorm = 183.4160, GNorm = 0.2154, lr_0 = 1.8952e-04
Loss = 2.8890e-03, PNorm = 183.4212, GNorm = 0.1028, lr_0 = 1.8939e-04
Loss = 5.1340e-03, PNorm = 183.4249, GNorm = 0.1125, lr_0 = 1.8926e-04
Loss = 4.7765e-03, PNorm = 183.4281, GNorm = 0.0750, lr_0 = 1.8913e-04
Loss = 8.4829e-03, PNorm = 183.4323, GNorm = 0.1511, lr_0 = 1.8900e-04
Loss = 2.6993e-03, PNorm = 183.4357, GNorm = 0.1649, lr_0 = 1.8887e-04
Loss = 3.7012e-03, PNorm = 183.4389, GNorm = 0.1158, lr_0 = 1.8874e-04
Loss = 5.3185e-03, PNorm = 183.4445, GNorm = 0.1021, lr_0 = 1.8861e-04
Loss = 4.5586e-03, PNorm = 183.4488, GNorm = 0.1780, lr_0 = 1.8848e-04
Loss = 3.1484e-03, PNorm = 183.4523, GNorm = 0.1233, lr_0 = 1.8835e-04
Loss = 5.4740e-03, PNorm = 183.4581, GNorm = 0.2054, lr_0 = 1.8822e-04
Loss = 6.7381e-03, PNorm = 183.4609, GNorm = 0.1949, lr_0 = 1.8809e-04
Loss = 2.7609e-03, PNorm = 183.4646, GNorm = 0.0759, lr_0 = 1.8797e-04
Loss = 3.0343e-03, PNorm = 183.4682, GNorm = 0.0805, lr_0 = 1.8784e-04
Loss = 8.4207e-03, PNorm = 183.4733, GNorm = 0.1202, lr_0 = 1.8771e-04
Loss = 5.3632e-03, PNorm = 183.4756, GNorm = 0.2520, lr_0 = 1.8758e-04
Loss = 4.1616e-03, PNorm = 183.4798, GNorm = 0.2216, lr_0 = 1.8745e-04
Loss = 6.6414e-03, PNorm = 183.4833, GNorm = 0.0948, lr_0 = 1.8732e-04
Loss = 9.1169e-03, PNorm = 183.4869, GNorm = 0.2279, lr_0 = 1.8719e-04
Loss = 5.1069e-03, PNorm = 183.4876, GNorm = 0.0864, lr_0 = 1.8707e-04
Loss = 4.0430e-03, PNorm = 183.4898, GNorm = 0.2934, lr_0 = 1.8694e-04
Loss = 7.3564e-03, PNorm = 183.4941, GNorm = 0.1735, lr_0 = 1.8681e-04
Loss = 7.6380e-03, PNorm = 183.4976, GNorm = 0.6334, lr_0 = 1.8668e-04
Loss = 4.2370e-03, PNorm = 183.5016, GNorm = 0.3280, lr_0 = 1.8655e-04
Loss = 4.8732e-03, PNorm = 183.5071, GNorm = 0.1592, lr_0 = 1.8643e-04
Loss = 4.5949e-03, PNorm = 183.5125, GNorm = 0.2676, lr_0 = 1.8630e-04
Loss = 7.7868e-03, PNorm = 183.5176, GNorm = 0.1128, lr_0 = 1.8617e-04
Loss = 3.5514e-03, PNorm = 183.5236, GNorm = 0.1208, lr_0 = 1.8604e-04
Loss = 3.1158e-03, PNorm = 183.5266, GNorm = 0.0969, lr_0 = 1.8592e-04
Loss = 3.9639e-03, PNorm = 183.5296, GNorm = 0.1124, lr_0 = 1.8579e-04
Loss = 4.3527e-03, PNorm = 183.5329, GNorm = 0.1176, lr_0 = 1.8566e-04
Loss = 4.8746e-03, PNorm = 183.5350, GNorm = 0.1244, lr_0 = 1.8553e-04
Loss = 6.6703e-03, PNorm = 183.5372, GNorm = 0.1762, lr_0 = 1.8541e-04
Loss = 4.5561e-03, PNorm = 183.5410, GNorm = 0.1179, lr_0 = 1.8528e-04
Loss = 6.4103e-03, PNorm = 183.5454, GNorm = 0.0586, lr_0 = 1.8515e-04
Loss = 3.9447e-03, PNorm = 183.5482, GNorm = 0.0709, lr_0 = 1.8503e-04
Loss = 5.8198e-03, PNorm = 183.5502, GNorm = 0.1722, lr_0 = 1.8490e-04
Loss = 3.6547e-03, PNorm = 183.5541, GNorm = 0.2636, lr_0 = 1.8477e-04
Loss = 4.2757e-03, PNorm = 183.5579, GNorm = 0.0837, lr_0 = 1.8465e-04
Loss = 4.5586e-03, PNorm = 183.5613, GNorm = 0.1851, lr_0 = 1.8452e-04
Loss = 9.6014e-03, PNorm = 183.5629, GNorm = 0.6856, lr_0 = 1.8439e-04
Loss = 5.4282e-03, PNorm = 183.5689, GNorm = 0.1389, lr_0 = 1.8427e-04
Loss = 4.9994e-03, PNorm = 183.5745, GNorm = 0.1418, lr_0 = 1.8414e-04
Loss = 5.9711e-03, PNorm = 183.5790, GNorm = 0.1251, lr_0 = 1.8401e-04
Loss = 5.7960e-03, PNorm = 183.5841, GNorm = 0.1219, lr_0 = 1.8389e-04
Loss = 4.9836e-03, PNorm = 183.5852, GNorm = 0.1288, lr_0 = 1.8376e-04
Loss = 3.9862e-03, PNorm = 183.5902, GNorm = 0.1004, lr_0 = 1.8364e-04
Loss = 3.0163e-03, PNorm = 183.5958, GNorm = 0.0863, lr_0 = 1.8351e-04
Loss = 3.4931e-03, PNorm = 183.6007, GNorm = 0.0818, lr_0 = 1.8338e-04
Loss = 3.7860e-03, PNorm = 183.6058, GNorm = 0.0969, lr_0 = 1.8326e-04
Loss = 6.4811e-03, PNorm = 183.6089, GNorm = 0.0936, lr_0 = 1.8313e-04
Loss = 4.3409e-03, PNorm = 183.6139, GNorm = 0.1006, lr_0 = 1.8301e-04
Loss = 4.3212e-03, PNorm = 183.6187, GNorm = 0.1107, lr_0 = 1.8288e-04
Loss = 8.4357e-03, PNorm = 183.6248, GNorm = 0.1589, lr_0 = 1.8276e-04
Loss = 4.4242e-03, PNorm = 183.6298, GNorm = 0.1298, lr_0 = 1.8263e-04
Loss = 4.1579e-03, PNorm = 183.6337, GNorm = 0.1435, lr_0 = 1.8251e-04
Loss = 3.9302e-03, PNorm = 183.6381, GNorm = 0.1600, lr_0 = 1.8238e-04
Loss = 3.2902e-03, PNorm = 183.6424, GNorm = 0.1271, lr_0 = 1.8226e-04
Loss = 3.9039e-03, PNorm = 183.6464, GNorm = 0.0952, lr_0 = 1.8213e-04
Loss = 6.1616e-03, PNorm = 183.6499, GNorm = 0.3409, lr_0 = 1.8201e-04
Loss = 1.2888e-02, PNorm = 183.6555, GNorm = 0.1479, lr_0 = 1.8188e-04
Loss = 4.0793e-03, PNorm = 183.6605, GNorm = 0.4089, lr_0 = 1.8176e-04
Loss = 3.3659e-03, PNorm = 183.6646, GNorm = 0.2389, lr_0 = 1.8163e-04
Loss = 4.0301e-03, PNorm = 183.6680, GNorm = 0.0958, lr_0 = 1.8151e-04
Loss = 4.7059e-03, PNorm = 183.6710, GNorm = 0.3242, lr_0 = 1.8138e-04
Loss = 9.0055e-03, PNorm = 183.6752, GNorm = 0.5133, lr_0 = 1.8126e-04
Loss = 4.1295e-03, PNorm = 183.6764, GNorm = 0.1414, lr_0 = 1.8114e-04
Loss = 2.7535e-03, PNorm = 183.6807, GNorm = 0.1721, lr_0 = 1.8101e-04
Loss = 3.2866e-03, PNorm = 183.6849, GNorm = 0.1402, lr_0 = 1.8089e-04
Loss = 7.9157e-03, PNorm = 183.6874, GNorm = 0.1354, lr_0 = 1.8076e-04
Loss = 3.4941e-03, PNorm = 183.6915, GNorm = 0.0573, lr_0 = 1.8064e-04
Loss = 5.0336e-03, PNorm = 183.6964, GNorm = 0.1338, lr_0 = 1.8052e-04
Loss = 4.7268e-03, PNorm = 183.7005, GNorm = 0.1109, lr_0 = 1.8039e-04
Loss = 6.4769e-03, PNorm = 183.7036, GNorm = 0.0634, lr_0 = 1.8027e-04
Loss = 3.8507e-03, PNorm = 183.7084, GNorm = 0.1125, lr_0 = 1.8015e-04
Loss = 5.8323e-03, PNorm = 183.7116, GNorm = 0.1591, lr_0 = 1.8002e-04
Loss = 4.3392e-03, PNorm = 183.7153, GNorm = 0.0837, lr_0 = 1.7990e-04
Loss = 1.2743e-02, PNorm = 183.7207, GNorm = 0.6412, lr_0 = 1.7978e-04
Loss = 4.5471e-03, PNorm = 183.7261, GNorm = 0.4146, lr_0 = 1.7965e-04
Loss = 3.0600e-03, PNorm = 183.7297, GNorm = 0.1132, lr_0 = 1.7953e-04
Loss = 2.6218e-03, PNorm = 183.7341, GNorm = 0.0801, lr_0 = 1.7941e-04
Loss = 4.3851e-03, PNorm = 183.7369, GNorm = 0.1030, lr_0 = 1.7928e-04
Loss = 3.9698e-03, PNorm = 183.7413, GNorm = 0.1464, lr_0 = 1.7916e-04
Loss = 7.2658e-03, PNorm = 183.7459, GNorm = 0.0744, lr_0 = 1.7904e-04
Loss = 7.6452e-03, PNorm = 183.7487, GNorm = 0.1929, lr_0 = 1.7892e-04
Loss = 4.2419e-03, PNorm = 183.7528, GNorm = 0.1277, lr_0 = 1.7879e-04
Loss = 2.7926e-03, PNorm = 183.7579, GNorm = 0.0930, lr_0 = 1.7867e-04
Loss = 6.6821e-03, PNorm = 183.7645, GNorm = 0.8038, lr_0 = 1.7855e-04
Loss = 1.9157e-02, PNorm = 183.7712, GNorm = 0.2838, lr_0 = 1.7843e-04
Loss = 4.0903e-03, PNorm = 183.7765, GNorm = 0.1832, lr_0 = 1.7830e-04
Loss = 6.4271e-03, PNorm = 183.7785, GNorm = 0.1078, lr_0 = 1.7818e-04
Loss = 4.2133e-03, PNorm = 183.7827, GNorm = 0.3194, lr_0 = 1.7806e-04
Loss = 3.8948e-03, PNorm = 183.7856, GNorm = 0.2283, lr_0 = 1.7794e-04
Loss = 5.7680e-03, PNorm = 183.7888, GNorm = 0.1578, lr_0 = 1.7782e-04
Validation mae = 0.120976
Epoch 23
Loss = 3.0376e-03, PNorm = 183.7929, GNorm = 0.1026, lr_0 = 1.7769e-04
Loss = 3.6826e-03, PNorm = 183.7951, GNorm = 0.1057, lr_0 = 1.7757e-04
Loss = 4.3007e-03, PNorm = 183.7975, GNorm = 0.1264, lr_0 = 1.7745e-04
Loss = 5.2529e-03, PNorm = 183.8011, GNorm = 0.0861, lr_0 = 1.7733e-04
Loss = 3.1950e-03, PNorm = 183.8046, GNorm = 0.1382, lr_0 = 1.7721e-04
Loss = 4.0542e-03, PNorm = 183.8070, GNorm = 0.0903, lr_0 = 1.7709e-04
Loss = 3.9651e-03, PNorm = 183.8087, GNorm = 0.2803, lr_0 = 1.7696e-04
Loss = 3.6868e-03, PNorm = 183.8101, GNorm = 0.1766, lr_0 = 1.7684e-04
Loss = 6.7224e-03, PNorm = 183.8133, GNorm = 0.0932, lr_0 = 1.7672e-04
Loss = 4.9226e-03, PNorm = 183.8154, GNorm = 0.2708, lr_0 = 1.7660e-04
Loss = 2.7392e-03, PNorm = 183.8187, GNorm = 0.2413, lr_0 = 1.7648e-04
Loss = 2.7488e-03, PNorm = 183.8237, GNorm = 0.0683, lr_0 = 1.7636e-04
Loss = 6.3679e-03, PNorm = 183.8277, GNorm = 0.4738, lr_0 = 1.7624e-04
Loss = 3.8404e-03, PNorm = 183.8299, GNorm = 0.1904, lr_0 = 1.7612e-04
Loss = 3.2779e-03, PNorm = 183.8307, GNorm = 0.0563, lr_0 = 1.7600e-04
Loss = 5.6298e-03, PNorm = 183.8336, GNorm = 0.0757, lr_0 = 1.7588e-04
Loss = 3.5107e-03, PNorm = 183.8375, GNorm = 0.0777, lr_0 = 1.7576e-04
Loss = 2.4019e-03, PNorm = 183.8402, GNorm = 0.0980, lr_0 = 1.7564e-04
Loss = 7.1296e-03, PNorm = 183.8434, GNorm = 0.0550, lr_0 = 1.7552e-04
Loss = 6.0143e-03, PNorm = 183.8459, GNorm = 0.1590, lr_0 = 1.7540e-04
Loss = 5.5358e-03, PNorm = 183.8465, GNorm = 0.3602, lr_0 = 1.7528e-04
Loss = 2.6515e-03, PNorm = 183.8504, GNorm = 0.1311, lr_0 = 1.7516e-04
Loss = 4.1758e-03, PNorm = 183.8525, GNorm = 0.1166, lr_0 = 1.7504e-04
Loss = 4.0522e-03, PNorm = 183.8555, GNorm = 0.1125, lr_0 = 1.7492e-04
Loss = 2.8872e-03, PNorm = 183.8582, GNorm = 0.0994, lr_0 = 1.7480e-04
Loss = 2.7128e-03, PNorm = 183.8613, GNorm = 0.1513, lr_0 = 1.7468e-04
Loss = 2.7661e-03, PNorm = 183.8640, GNorm = 0.0860, lr_0 = 1.7456e-04
Loss = 6.3163e-03, PNorm = 183.8668, GNorm = 0.0873, lr_0 = 1.7444e-04
Loss = 2.5601e-03, PNorm = 183.8694, GNorm = 0.1976, lr_0 = 1.7432e-04
Loss = 4.6751e-03, PNorm = 183.8727, GNorm = 0.1252, lr_0 = 1.7420e-04
Loss = 5.0357e-03, PNorm = 183.8769, GNorm = 0.1037, lr_0 = 1.7408e-04
Loss = 3.0842e-03, PNorm = 183.8815, GNorm = 0.0861, lr_0 = 1.7396e-04
Loss = 3.3219e-03, PNorm = 183.8862, GNorm = 0.1702, lr_0 = 1.7384e-04
Loss = 5.1328e-03, PNorm = 183.8921, GNorm = 0.2432, lr_0 = 1.7372e-04
Loss = 2.9299e-03, PNorm = 183.8943, GNorm = 0.0894, lr_0 = 1.7360e-04
Loss = 3.8512e-03, PNorm = 183.8974, GNorm = 0.2632, lr_0 = 1.7348e-04
Loss = 5.4104e-03, PNorm = 183.8985, GNorm = 0.2528, lr_0 = 1.7336e-04
Loss = 5.8582e-03, PNorm = 183.9012, GNorm = 0.1103, lr_0 = 1.7325e-04
Loss = 2.8731e-03, PNorm = 183.9041, GNorm = 0.1249, lr_0 = 1.7313e-04
Loss = 9.7189e-03, PNorm = 183.9074, GNorm = 0.2267, lr_0 = 1.7301e-04
Loss = 2.4266e-03, PNorm = 183.9126, GNorm = 0.1235, lr_0 = 1.7289e-04
Loss = 2.5994e-03, PNorm = 183.9157, GNorm = 0.1632, lr_0 = 1.7277e-04
Loss = 5.3441e-03, PNorm = 183.9176, GNorm = 0.3595, lr_0 = 1.7265e-04
Loss = 2.2272e-03, PNorm = 183.9197, GNorm = 0.2038, lr_0 = 1.7253e-04
Loss = 2.8522e-03, PNorm = 183.9218, GNorm = 0.1301, lr_0 = 1.7242e-04
Loss = 4.7045e-03, PNorm = 183.9253, GNorm = 0.0853, lr_0 = 1.7230e-04
Loss = 9.9389e-03, PNorm = 183.9288, GNorm = 0.0681, lr_0 = 1.7218e-04
Loss = 6.4290e-03, PNorm = 183.9319, GNorm = 0.1116, lr_0 = 1.7206e-04
Loss = 8.2736e-03, PNorm = 183.9338, GNorm = 0.1302, lr_0 = 1.7194e-04
Loss = 3.4435e-03, PNorm = 183.9361, GNorm = 0.1229, lr_0 = 1.7183e-04
Loss = 4.9297e-03, PNorm = 183.9379, GNorm = 0.1706, lr_0 = 1.7171e-04
Loss = 5.4709e-03, PNorm = 183.9404, GNorm = 0.1899, lr_0 = 1.7159e-04
Loss = 7.1628e-03, PNorm = 183.9452, GNorm = 0.3506, lr_0 = 1.7147e-04
Loss = 3.2058e-03, PNorm = 183.9495, GNorm = 0.2188, lr_0 = 1.7136e-04
Loss = 9.2894e-03, PNorm = 183.9527, GNorm = 0.2522, lr_0 = 1.7124e-04
Loss = 4.6999e-03, PNorm = 183.9550, GNorm = 0.2181, lr_0 = 1.7112e-04
Loss = 1.2757e-02, PNorm = 183.9587, GNorm = 0.3942, lr_0 = 1.7100e-04
Loss = 9.3603e-03, PNorm = 183.9591, GNorm = 0.3151, lr_0 = 1.7089e-04
Loss = 8.9310e-03, PNorm = 183.9621, GNorm = 0.5202, lr_0 = 1.7077e-04
Loss = 6.7867e-03, PNorm = 183.9658, GNorm = 0.5075, lr_0 = 1.7065e-04
Loss = 6.1986e-03, PNorm = 183.9682, GNorm = 0.0696, lr_0 = 1.7054e-04
Loss = 3.8670e-03, PNorm = 183.9722, GNorm = 0.1035, lr_0 = 1.7042e-04
Loss = 2.2730e-03, PNorm = 183.9756, GNorm = 0.1862, lr_0 = 1.7030e-04
Loss = 5.8611e-03, PNorm = 183.9786, GNorm = 0.3427, lr_0 = 1.7019e-04
Loss = 2.8670e-03, PNorm = 183.9812, GNorm = 0.2625, lr_0 = 1.7007e-04
Loss = 3.1608e-03, PNorm = 183.9848, GNorm = 0.1766, lr_0 = 1.6995e-04
Loss = 3.1692e-03, PNorm = 183.9886, GNorm = 0.2128, lr_0 = 1.6984e-04
Loss = 3.1598e-03, PNorm = 183.9917, GNorm = 0.1232, lr_0 = 1.6972e-04
Loss = 4.5684e-03, PNorm = 183.9965, GNorm = 0.2378, lr_0 = 1.6960e-04
Loss = 8.6657e-03, PNorm = 184.0004, GNorm = 1.4378, lr_0 = 1.6949e-04
Loss = 3.3133e-03, PNorm = 184.0033, GNorm = 0.1962, lr_0 = 1.6937e-04
Loss = 7.3203e-03, PNorm = 184.0073, GNorm = 0.1408, lr_0 = 1.6926e-04
Loss = 2.9838e-03, PNorm = 184.0117, GNorm = 0.3507, lr_0 = 1.6914e-04
Loss = 2.7285e-03, PNorm = 184.0171, GNorm = 0.1471, lr_0 = 1.6902e-04
Loss = 2.8662e-03, PNorm = 184.0205, GNorm = 0.1299, lr_0 = 1.6891e-04
Loss = 3.5071e-03, PNorm = 184.0244, GNorm = 0.1897, lr_0 = 1.6879e-04
Loss = 3.5470e-03, PNorm = 184.0291, GNorm = 0.1817, lr_0 = 1.6868e-04
Loss = 4.6948e-03, PNorm = 184.0335, GNorm = 0.0694, lr_0 = 1.6856e-04
Loss = 2.9130e-03, PNorm = 184.0361, GNorm = 0.1797, lr_0 = 1.6845e-04
Loss = 3.9632e-03, PNorm = 184.0382, GNorm = 0.0722, lr_0 = 1.6833e-04
Loss = 2.6333e-03, PNorm = 184.0412, GNorm = 0.1665, lr_0 = 1.6821e-04
Loss = 3.0548e-03, PNorm = 184.0438, GNorm = 0.0615, lr_0 = 1.6810e-04
Loss = 7.4791e-03, PNorm = 184.0458, GNorm = 0.1313, lr_0 = 1.6798e-04
Loss = 3.4245e-03, PNorm = 184.0487, GNorm = 0.1359, lr_0 = 1.6787e-04
Loss = 4.4026e-03, PNorm = 184.0508, GNorm = 0.1823, lr_0 = 1.6775e-04
Loss = 3.8883e-03, PNorm = 184.0539, GNorm = 0.0783, lr_0 = 1.6764e-04
Loss = 4.1104e-03, PNorm = 184.0564, GNorm = 0.1064, lr_0 = 1.6752e-04
Loss = 2.4633e-03, PNorm = 184.0603, GNorm = 0.1424, lr_0 = 1.6741e-04
Loss = 3.5007e-03, PNorm = 184.0636, GNorm = 0.1641, lr_0 = 1.6729e-04
Loss = 3.6319e-03, PNorm = 184.0674, GNorm = 0.0695, lr_0 = 1.6718e-04
Loss = 5.8941e-03, PNorm = 184.0709, GNorm = 0.3265, lr_0 = 1.6707e-04
Loss = 4.1036e-03, PNorm = 184.0745, GNorm = 0.0746, lr_0 = 1.6695e-04
Loss = 3.6073e-03, PNorm = 184.0789, GNorm = 0.2052, lr_0 = 1.6684e-04
Loss = 4.1181e-03, PNorm = 184.0823, GNorm = 0.0790, lr_0 = 1.6672e-04
Loss = 5.6506e-03, PNorm = 184.0857, GNorm = 0.2646, lr_0 = 1.6661e-04
Loss = 2.9734e-03, PNorm = 184.0883, GNorm = 0.1134, lr_0 = 1.6649e-04
Loss = 4.1489e-03, PNorm = 184.0911, GNorm = 0.0661, lr_0 = 1.6638e-04
Loss = 3.4105e-03, PNorm = 184.0935, GNorm = 0.1006, lr_0 = 1.6627e-04
Loss = 5.2692e-03, PNorm = 184.0986, GNorm = 0.2209, lr_0 = 1.6615e-04
Loss = 7.3388e-03, PNorm = 184.1014, GNorm = 0.0690, lr_0 = 1.6604e-04
Loss = 4.2638e-03, PNorm = 184.1058, GNorm = 0.1426, lr_0 = 1.6592e-04
Loss = 3.6214e-03, PNorm = 184.1091, GNorm = 0.0851, lr_0 = 1.6581e-04
Loss = 3.5950e-03, PNorm = 184.1104, GNorm = 0.0691, lr_0 = 1.6570e-04
Loss = 2.2731e-03, PNorm = 184.1137, GNorm = 0.1651, lr_0 = 1.6558e-04
Loss = 5.2368e-03, PNorm = 184.1171, GNorm = 0.0673, lr_0 = 1.6547e-04
Loss = 3.7332e-03, PNorm = 184.1210, GNorm = 0.1369, lr_0 = 1.6536e-04
Loss = 4.5712e-03, PNorm = 184.1242, GNorm = 0.1477, lr_0 = 1.6524e-04
Loss = 3.9397e-03, PNorm = 184.1295, GNorm = 0.1015, lr_0 = 1.6513e-04
Loss = 3.9580e-03, PNorm = 184.1332, GNorm = 0.0977, lr_0 = 1.6502e-04
Loss = 4.8195e-03, PNorm = 184.1365, GNorm = 0.2888, lr_0 = 1.6490e-04
Loss = 8.9620e-03, PNorm = 184.1429, GNorm = 0.2167, lr_0 = 1.6479e-04
Loss = 6.4715e-03, PNorm = 184.1452, GNorm = 0.1175, lr_0 = 1.6468e-04
Loss = 4.2239e-03, PNorm = 184.1479, GNorm = 0.0795, lr_0 = 1.6457e-04
Loss = 1.0028e-02, PNorm = 184.1493, GNorm = 0.2622, lr_0 = 1.6445e-04
Loss = 5.1368e-03, PNorm = 184.1518, GNorm = 0.3318, lr_0 = 1.6434e-04
Loss = 2.3472e-03, PNorm = 184.1547, GNorm = 0.1555, lr_0 = 1.6423e-04
Loss = 3.3249e-03, PNorm = 184.1583, GNorm = 0.1934, lr_0 = 1.6412e-04
Loss = 1.0808e-02, PNorm = 184.1611, GNorm = 0.0975, lr_0 = 1.6400e-04
Loss = 5.4334e-03, PNorm = 184.1657, GNorm = 0.1404, lr_0 = 1.6389e-04
Loss = 2.1420e-03, PNorm = 184.1693, GNorm = 0.1028, lr_0 = 1.6378e-04
Validation mae = 0.121226
Epoch 24
Loss = 2.4784e-03, PNorm = 184.1712, GNorm = 0.1195, lr_0 = 1.6367e-04
Loss = 4.9818e-03, PNorm = 184.1746, GNorm = 0.1410, lr_0 = 1.6355e-04
Loss = 2.9485e-03, PNorm = 184.1781, GNorm = 0.1170, lr_0 = 1.6344e-04
Loss = 4.2431e-03, PNorm = 184.1804, GNorm = 0.1269, lr_0 = 1.6333e-04
Loss = 5.4945e-03, PNorm = 184.1826, GNorm = 0.1200, lr_0 = 1.6322e-04
Loss = 3.7592e-03, PNorm = 184.1848, GNorm = 0.1362, lr_0 = 1.6311e-04
Loss = 5.1541e-03, PNorm = 184.1865, GNorm = 0.1185, lr_0 = 1.6299e-04
Loss = 2.7421e-03, PNorm = 184.1888, GNorm = 0.1484, lr_0 = 1.6288e-04
Loss = 6.6071e-03, PNorm = 184.1908, GNorm = 0.0456, lr_0 = 1.6277e-04
Loss = 3.5604e-03, PNorm = 184.1920, GNorm = 0.1994, lr_0 = 1.6266e-04
Loss = 2.3814e-03, PNorm = 184.1940, GNorm = 0.2249, lr_0 = 1.6255e-04
Loss = 3.9843e-03, PNorm = 184.1969, GNorm = 0.0591, lr_0 = 1.6244e-04
Loss = 4.3373e-03, PNorm = 184.2000, GNorm = 0.1044, lr_0 = 1.6233e-04
Loss = 4.5571e-03, PNorm = 184.2019, GNorm = 0.6331, lr_0 = 1.6221e-04
Loss = 6.0167e-03, PNorm = 184.2049, GNorm = 0.0811, lr_0 = 1.6210e-04
Loss = 2.6842e-03, PNorm = 184.2062, GNorm = 0.0934, lr_0 = 1.6199e-04
Loss = 4.4985e-03, PNorm = 184.2090, GNorm = 0.1347, lr_0 = 1.6188e-04
Loss = 1.2146e-02, PNorm = 184.2138, GNorm = 0.3692, lr_0 = 1.6177e-04
Loss = 2.9981e-03, PNorm = 184.2164, GNorm = 0.1956, lr_0 = 1.6166e-04
Loss = 3.4951e-03, PNorm = 184.2169, GNorm = 0.1609, lr_0 = 1.6155e-04
Loss = 2.5647e-03, PNorm = 184.2177, GNorm = 0.1653, lr_0 = 1.6144e-04
Loss = 5.7051e-03, PNorm = 184.2194, GNorm = 0.1083, lr_0 = 1.6133e-04
Loss = 3.8578e-03, PNorm = 184.2228, GNorm = 0.0699, lr_0 = 1.6122e-04
Loss = 6.0683e-03, PNorm = 184.2238, GNorm = 0.1712, lr_0 = 1.6111e-04
Loss = 3.1126e-03, PNorm = 184.2269, GNorm = 0.2941, lr_0 = 1.6100e-04
Loss = 3.0097e-03, PNorm = 184.2304, GNorm = 0.1819, lr_0 = 1.6089e-04
Loss = 7.6631e-03, PNorm = 184.2344, GNorm = 0.2252, lr_0 = 1.6078e-04
Loss = 2.3316e-03, PNorm = 184.2376, GNorm = 0.0730, lr_0 = 1.6067e-04
Loss = 1.7875e-03, PNorm = 184.2409, GNorm = 0.1765, lr_0 = 1.6056e-04
Loss = 2.5207e-03, PNorm = 184.2435, GNorm = 0.0461, lr_0 = 1.6045e-04
Loss = 2.4973e-03, PNorm = 184.2453, GNorm = 0.1162, lr_0 = 1.6034e-04
Loss = 2.0114e-03, PNorm = 184.2471, GNorm = 0.0837, lr_0 = 1.6023e-04
Loss = 2.2025e-03, PNorm = 184.2485, GNorm = 0.1032, lr_0 = 1.6012e-04
Loss = 6.9768e-03, PNorm = 184.2485, GNorm = 0.2460, lr_0 = 1.6001e-04
Loss = 5.8706e-03, PNorm = 184.2499, GNorm = 0.3283, lr_0 = 1.5990e-04
Loss = 2.0782e-03, PNorm = 184.2533, GNorm = 0.0693, lr_0 = 1.5979e-04
Loss = 2.9164e-03, PNorm = 184.2557, GNorm = 0.1045, lr_0 = 1.5968e-04
Loss = 5.3436e-03, PNorm = 184.2610, GNorm = 0.1806, lr_0 = 1.5957e-04
Loss = 3.4557e-03, PNorm = 184.2644, GNorm = 0.2840, lr_0 = 1.5946e-04
Loss = 4.0851e-03, PNorm = 184.2681, GNorm = 0.1196, lr_0 = 1.5935e-04
Loss = 7.4229e-03, PNorm = 184.2711, GNorm = 0.2154, lr_0 = 1.5924e-04
Loss = 2.9172e-03, PNorm = 184.2736, GNorm = 0.1050, lr_0 = 1.5913e-04
Loss = 2.8470e-03, PNorm = 184.2761, GNorm = 0.0680, lr_0 = 1.5902e-04
Loss = 7.3740e-03, PNorm = 184.2783, GNorm = 0.0440, lr_0 = 1.5891e-04
Loss = 3.2763e-03, PNorm = 184.2806, GNorm = 0.1310, lr_0 = 1.5880e-04
Loss = 4.1117e-03, PNorm = 184.2833, GNorm = 0.0838, lr_0 = 1.5870e-04
Loss = 3.4797e-03, PNorm = 184.2848, GNorm = 0.1385, lr_0 = 1.5859e-04
Loss = 2.5659e-03, PNorm = 184.2860, GNorm = 0.1134, lr_0 = 1.5848e-04
Loss = 3.2482e-03, PNorm = 184.2884, GNorm = 0.1505, lr_0 = 1.5837e-04
Loss = 6.8964e-03, PNorm = 184.2918, GNorm = 0.1237, lr_0 = 1.5826e-04
Loss = 4.6362e-03, PNorm = 184.2938, GNorm = 0.1067, lr_0 = 1.5815e-04
Loss = 5.8895e-03, PNorm = 184.2966, GNorm = 0.5884, lr_0 = 1.5804e-04
Loss = 2.2165e-03, PNorm = 184.2998, GNorm = 0.1439, lr_0 = 1.5794e-04
Loss = 2.3980e-03, PNorm = 184.3016, GNorm = 0.1834, lr_0 = 1.5783e-04
Loss = 1.9598e-03, PNorm = 184.3037, GNorm = 0.1059, lr_0 = 1.5772e-04
Loss = 3.8734e-03, PNorm = 184.3054, GNorm = 0.1357, lr_0 = 1.5761e-04
Loss = 6.8894e-03, PNorm = 184.3087, GNorm = 0.1053, lr_0 = 1.5750e-04
Loss = 3.4176e-03, PNorm = 184.3107, GNorm = 0.0852, lr_0 = 1.5740e-04
Loss = 2.7735e-03, PNorm = 184.3132, GNorm = 0.0695, lr_0 = 1.5729e-04
Loss = 2.6425e-03, PNorm = 184.3172, GNorm = 0.0833, lr_0 = 1.5718e-04
Loss = 2.4107e-03, PNorm = 184.3196, GNorm = 0.1685, lr_0 = 1.5707e-04
Loss = 3.0463e-03, PNorm = 184.3209, GNorm = 0.1691, lr_0 = 1.5697e-04
Loss = 2.1345e-03, PNorm = 184.3225, GNorm = 0.1384, lr_0 = 1.5686e-04
Loss = 1.9915e-03, PNorm = 184.3253, GNorm = 0.0499, lr_0 = 1.5675e-04
Loss = 3.2946e-03, PNorm = 184.3283, GNorm = 0.2075, lr_0 = 1.5664e-04
Loss = 2.4495e-03, PNorm = 184.3309, GNorm = 0.1586, lr_0 = 1.5654e-04
Loss = 5.6604e-03, PNorm = 184.3341, GNorm = 0.1087, lr_0 = 1.5643e-04
Loss = 4.8751e-03, PNorm = 184.3382, GNorm = 0.2547, lr_0 = 1.5632e-04
Loss = 3.5908e-03, PNorm = 184.3404, GNorm = 0.0931, lr_0 = 1.5621e-04
Loss = 2.4798e-03, PNorm = 184.3438, GNorm = 0.1110, lr_0 = 1.5611e-04
Loss = 1.9924e-03, PNorm = 184.3472, GNorm = 0.0550, lr_0 = 1.5600e-04
Loss = 2.5825e-03, PNorm = 184.3496, GNorm = 0.1012, lr_0 = 1.5589e-04
Loss = 7.9660e-03, PNorm = 184.3518, GNorm = 0.1595, lr_0 = 1.5579e-04
Loss = 1.9947e-03, PNorm = 184.3551, GNorm = 0.0562, lr_0 = 1.5568e-04
Loss = 2.2942e-03, PNorm = 184.3576, GNorm = 0.2770, lr_0 = 1.5557e-04
Loss = 1.9650e-03, PNorm = 184.3591, GNorm = 0.0665, lr_0 = 1.5547e-04
Loss = 4.1814e-03, PNorm = 184.3610, GNorm = 0.2617, lr_0 = 1.5536e-04
Loss = 1.7772e-03, PNorm = 184.3646, GNorm = 0.0653, lr_0 = 1.5525e-04
Loss = 3.1416e-03, PNorm = 184.3682, GNorm = 0.0578, lr_0 = 1.5515e-04
Loss = 3.8360e-03, PNorm = 184.3706, GNorm = 0.0898, lr_0 = 1.5504e-04
Loss = 7.3011e-03, PNorm = 184.3730, GNorm = 0.1066, lr_0 = 1.5493e-04
Loss = 1.0092e-02, PNorm = 184.3750, GNorm = 0.1053, lr_0 = 1.5483e-04
Loss = 3.5000e-03, PNorm = 184.3765, GNorm = 0.1400, lr_0 = 1.5472e-04
Loss = 1.0269e-02, PNorm = 184.3777, GNorm = 0.2949, lr_0 = 1.5462e-04
Loss = 4.0129e-03, PNorm = 184.3794, GNorm = 0.1823, lr_0 = 1.5451e-04
Loss = 4.7234e-03, PNorm = 184.3811, GNorm = 0.0938, lr_0 = 1.5440e-04
Loss = 5.6187e-03, PNorm = 184.3835, GNorm = 0.1519, lr_0 = 1.5430e-04
Loss = 2.3223e-03, PNorm = 184.3871, GNorm = 0.1231, lr_0 = 1.5419e-04
Loss = 1.7098e-03, PNorm = 184.3892, GNorm = 0.1269, lr_0 = 1.5409e-04
Loss = 4.3747e-03, PNorm = 184.3925, GNorm = 0.1824, lr_0 = 1.5398e-04
Loss = 2.9464e-03, PNorm = 184.3961, GNorm = 0.0738, lr_0 = 1.5388e-04
Loss = 6.2453e-03, PNorm = 184.3983, GNorm = 0.0757, lr_0 = 1.5377e-04
Loss = 2.3245e-03, PNorm = 184.4001, GNorm = 0.0892, lr_0 = 1.5367e-04
Loss = 2.8370e-03, PNorm = 184.4006, GNorm = 0.1616, lr_0 = 1.5356e-04
Loss = 6.7489e-03, PNorm = 184.4016, GNorm = 0.1028, lr_0 = 1.5346e-04
Loss = 2.9804e-03, PNorm = 184.4042, GNorm = 0.0538, lr_0 = 1.5335e-04
Loss = 3.9266e-03, PNorm = 184.4083, GNorm = 0.1481, lr_0 = 1.5325e-04
Loss = 2.5270e-03, PNorm = 184.4120, GNorm = 0.0946, lr_0 = 1.5314e-04
Loss = 2.6882e-03, PNorm = 184.4156, GNorm = 0.1781, lr_0 = 1.5304e-04
Loss = 3.4051e-03, PNorm = 184.4175, GNorm = 0.1366, lr_0 = 1.5293e-04
Loss = 3.1244e-03, PNorm = 184.4219, GNorm = 0.1503, lr_0 = 1.5283e-04
Loss = 3.0412e-03, PNorm = 184.4238, GNorm = 0.1170, lr_0 = 1.5272e-04
Loss = 2.2582e-03, PNorm = 184.4263, GNorm = 0.0702, lr_0 = 1.5262e-04
Loss = 2.9297e-03, PNorm = 184.4295, GNorm = 0.1058, lr_0 = 1.5251e-04
Loss = 3.4946e-03, PNorm = 184.4335, GNorm = 0.0655, lr_0 = 1.5241e-04
Loss = 4.1535e-03, PNorm = 184.4353, GNorm = 0.2387, lr_0 = 1.5230e-04
Loss = 3.9819e-03, PNorm = 184.4377, GNorm = 0.1254, lr_0 = 1.5220e-04
Loss = 4.1059e-03, PNorm = 184.4409, GNorm = 0.2707, lr_0 = 1.5209e-04
Loss = 7.1786e-03, PNorm = 184.4430, GNorm = 0.1078, lr_0 = 1.5199e-04
Loss = 3.7913e-03, PNorm = 184.4454, GNorm = 0.1882, lr_0 = 1.5189e-04
Loss = 8.2746e-03, PNorm = 184.4464, GNorm = 0.1389, lr_0 = 1.5178e-04
Loss = 5.8172e-03, PNorm = 184.4495, GNorm = 0.0601, lr_0 = 1.5168e-04
Loss = 5.1733e-03, PNorm = 184.4509, GNorm = 0.0996, lr_0 = 1.5157e-04
Loss = 5.3210e-03, PNorm = 184.4522, GNorm = 0.0886, lr_0 = 1.5147e-04
Loss = 7.3059e-03, PNorm = 184.4567, GNorm = 0.2217, lr_0 = 1.5137e-04
Loss = 5.0489e-03, PNorm = 184.4615, GNorm = 0.0428, lr_0 = 1.5126e-04
Loss = 8.2630e-03, PNorm = 184.4644, GNorm = 0.1082, lr_0 = 1.5116e-04
Loss = 8.5828e-03, PNorm = 184.4667, GNorm = 0.3482, lr_0 = 1.5106e-04
Loss = 5.7588e-03, PNorm = 184.4692, GNorm = 0.0750, lr_0 = 1.5095e-04
Loss = 2.8474e-03, PNorm = 184.4742, GNorm = 0.4873, lr_0 = 1.5085e-04
Validation mae = 0.121277
Epoch 25
Loss = 2.5630e-03, PNorm = 184.4782, GNorm = 0.0598, lr_0 = 1.5075e-04
Loss = 1.8355e-03, PNorm = 184.4802, GNorm = 0.0776, lr_0 = 1.5064e-04
Loss = 3.4954e-03, PNorm = 184.4813, GNorm = 0.0933, lr_0 = 1.5054e-04
Loss = 2.6912e-03, PNorm = 184.4823, GNorm = 0.1953, lr_0 = 1.5044e-04
Loss = 2.8603e-03, PNorm = 184.4839, GNorm = 0.0757, lr_0 = 1.5033e-04
Loss = 5.0736e-03, PNorm = 184.4853, GNorm = 0.2357, lr_0 = 1.5023e-04
Loss = 5.1569e-03, PNorm = 184.4875, GNorm = 0.1579, lr_0 = 1.5013e-04
Loss = 3.8915e-03, PNorm = 184.4909, GNorm = 1.0620, lr_0 = 1.5002e-04
Loss = 1.8480e-03, PNorm = 184.4925, GNorm = 0.0972, lr_0 = 1.4992e-04
Loss = 6.7690e-03, PNorm = 184.4952, GNorm = 0.0663, lr_0 = 1.4982e-04
Loss = 5.7265e-03, PNorm = 184.4979, GNorm = 0.1416, lr_0 = 1.4972e-04
Loss = 1.5582e-02, PNorm = 184.5012, GNorm = 0.2671, lr_0 = 1.4961e-04
Loss = 6.1236e-03, PNorm = 184.5016, GNorm = 0.1798, lr_0 = 1.4951e-04
Loss = 4.0068e-03, PNorm = 184.5060, GNorm = 0.1056, lr_0 = 1.4941e-04
Loss = 3.1894e-03, PNorm = 184.5095, GNorm = 0.0672, lr_0 = 1.4931e-04
Loss = 2.5363e-03, PNorm = 184.5116, GNorm = 0.1730, lr_0 = 1.4920e-04
Loss = 3.4932e-03, PNorm = 184.5140, GNorm = 0.1589, lr_0 = 1.4910e-04
Loss = 4.8825e-03, PNorm = 184.5161, GNorm = 0.0743, lr_0 = 1.4900e-04
Loss = 2.2370e-03, PNorm = 184.5173, GNorm = 0.0407, lr_0 = 1.4890e-04
Loss = 2.4512e-03, PNorm = 184.5190, GNorm = 0.1297, lr_0 = 1.4880e-04
Loss = 4.4151e-03, PNorm = 184.5212, GNorm = 0.1122, lr_0 = 1.4869e-04
Loss = 6.3751e-03, PNorm = 184.5230, GNorm = 0.1011, lr_0 = 1.4859e-04
Loss = 2.3488e-03, PNorm = 184.5264, GNorm = 0.1027, lr_0 = 1.4849e-04
Loss = 7.5375e-03, PNorm = 184.5293, GNorm = 0.1449, lr_0 = 1.4839e-04
Loss = 2.8672e-03, PNorm = 184.5317, GNorm = 0.1321, lr_0 = 1.4829e-04
Loss = 4.3997e-03, PNorm = 184.5347, GNorm = 0.1077, lr_0 = 1.4818e-04
Loss = 2.9316e-03, PNorm = 184.5372, GNorm = 0.0924, lr_0 = 1.4808e-04
Loss = 2.1409e-03, PNorm = 184.5380, GNorm = 0.0508, lr_0 = 1.4798e-04
Loss = 4.8980e-03, PNorm = 184.5397, GNorm = 0.1256, lr_0 = 1.4788e-04
Loss = 2.0168e-03, PNorm = 184.5422, GNorm = 0.0663, lr_0 = 1.4778e-04
Loss = 4.5830e-03, PNorm = 184.5443, GNorm = 0.2384, lr_0 = 1.4768e-04
Loss = 2.2138e-03, PNorm = 184.5462, GNorm = 0.0979, lr_0 = 1.4758e-04
Loss = 2.0533e-03, PNorm = 184.5486, GNorm = 0.1376, lr_0 = 1.4748e-04
Loss = 2.1021e-03, PNorm = 184.5518, GNorm = 0.0495, lr_0 = 1.4737e-04
Loss = 3.7370e-03, PNorm = 184.5530, GNorm = 0.0966, lr_0 = 1.4727e-04
Loss = 2.5965e-03, PNorm = 184.5564, GNorm = 0.0773, lr_0 = 1.4717e-04
Loss = 2.6488e-03, PNorm = 184.5598, GNorm = 0.1501, lr_0 = 1.4707e-04
Loss = 4.6716e-03, PNorm = 184.5634, GNorm = 0.0868, lr_0 = 1.4697e-04
Loss = 3.7757e-03, PNorm = 184.5664, GNorm = 0.2211, lr_0 = 1.4687e-04
Loss = 2.6048e-03, PNorm = 184.5696, GNorm = 0.1895, lr_0 = 1.4677e-04
Loss = 2.9948e-03, PNorm = 184.5710, GNorm = 0.1520, lr_0 = 1.4667e-04
Loss = 2.7858e-03, PNorm = 184.5736, GNorm = 0.3615, lr_0 = 1.4657e-04
Loss = 3.2901e-03, PNorm = 184.5750, GNorm = 0.1467, lr_0 = 1.4647e-04
Loss = 2.0465e-03, PNorm = 184.5771, GNorm = 0.0688, lr_0 = 1.4637e-04
Loss = 1.8357e-03, PNorm = 184.5799, GNorm = 0.0604, lr_0 = 1.4627e-04
Loss = 7.6428e-03, PNorm = 184.5822, GNorm = 0.0877, lr_0 = 1.4617e-04
Loss = 2.6019e-03, PNorm = 184.5861, GNorm = 0.0571, lr_0 = 1.4607e-04
Loss = 2.0087e-03, PNorm = 184.5890, GNorm = 0.0993, lr_0 = 1.4597e-04
Loss = 1.8026e-03, PNorm = 184.5914, GNorm = 0.0691, lr_0 = 1.4587e-04
Loss = 4.6893e-03, PNorm = 184.5924, GNorm = 0.0938, lr_0 = 1.4577e-04
Loss = 1.5077e-03, PNorm = 184.5938, GNorm = 0.0614, lr_0 = 1.4567e-04
Loss = 2.4471e-03, PNorm = 184.5957, GNorm = 0.0835, lr_0 = 1.4557e-04
Loss = 3.5437e-03, PNorm = 184.5967, GNorm = 0.0893, lr_0 = 1.4547e-04
Loss = 8.1819e-03, PNorm = 184.5996, GNorm = 0.2067, lr_0 = 1.4537e-04
Loss = 1.9551e-03, PNorm = 184.6012, GNorm = 0.0812, lr_0 = 1.4527e-04
Loss = 2.1428e-03, PNorm = 184.6030, GNorm = 0.1284, lr_0 = 1.4517e-04
Loss = 4.5369e-03, PNorm = 184.6053, GNorm = 0.1583, lr_0 = 1.4507e-04
Loss = 4.6834e-03, PNorm = 184.6069, GNorm = 0.0669, lr_0 = 1.4497e-04
Loss = 3.2773e-03, PNorm = 184.6090, GNorm = 0.3222, lr_0 = 1.4487e-04
Loss = 2.2528e-03, PNorm = 184.6126, GNorm = 0.1031, lr_0 = 1.4477e-04
Loss = 2.8669e-03, PNorm = 184.6156, GNorm = 0.1797, lr_0 = 1.4467e-04
Loss = 2.4147e-03, PNorm = 184.6179, GNorm = 0.0842, lr_0 = 1.4457e-04
Loss = 1.8717e-03, PNorm = 184.6198, GNorm = 0.0960, lr_0 = 1.4447e-04
Loss = 2.2519e-03, PNorm = 184.6214, GNorm = 0.0678, lr_0 = 1.4438e-04
Loss = 6.8675e-03, PNorm = 184.6230, GNorm = 1.9801, lr_0 = 1.4428e-04
Loss = 2.0943e-03, PNorm = 184.6246, GNorm = 0.0881, lr_0 = 1.4418e-04
Loss = 2.5780e-03, PNorm = 184.6281, GNorm = 0.1113, lr_0 = 1.4408e-04
Loss = 2.6533e-03, PNorm = 184.6306, GNorm = 0.1337, lr_0 = 1.4398e-04
Loss = 3.8956e-03, PNorm = 184.6344, GNorm = 0.0843, lr_0 = 1.4388e-04
Loss = 1.6734e-03, PNorm = 184.6371, GNorm = 0.1334, lr_0 = 1.4378e-04
Loss = 5.1374e-03, PNorm = 184.6398, GNorm = 0.2706, lr_0 = 1.4368e-04
Loss = 4.2957e-03, PNorm = 184.6436, GNorm = 0.1265, lr_0 = 1.4359e-04
Loss = 4.0488e-03, PNorm = 184.6462, GNorm = 0.1715, lr_0 = 1.4349e-04
Loss = 1.8429e-03, PNorm = 184.6494, GNorm = 0.0760, lr_0 = 1.4339e-04
Loss = 6.3251e-03, PNorm = 184.6514, GNorm = 0.0639, lr_0 = 1.4329e-04
Loss = 3.6571e-03, PNorm = 184.6537, GNorm = 0.1712, lr_0 = 1.4319e-04
Loss = 2.4880e-03, PNorm = 184.6564, GNorm = 0.1241, lr_0 = 1.4310e-04
Loss = 8.8006e-03, PNorm = 184.6577, GNorm = 0.0564, lr_0 = 1.4300e-04
Loss = 5.1071e-03, PNorm = 184.6600, GNorm = 0.1376, lr_0 = 1.4290e-04
Loss = 3.8490e-03, PNorm = 184.6632, GNorm = 0.2377, lr_0 = 1.4280e-04
Loss = 1.8829e-03, PNorm = 184.6657, GNorm = 0.0829, lr_0 = 1.4270e-04
Loss = 2.1406e-03, PNorm = 184.6685, GNorm = 0.1638, lr_0 = 1.4261e-04
Loss = 3.5490e-03, PNorm = 184.6719, GNorm = 0.1614, lr_0 = 1.4251e-04
Loss = 3.9373e-03, PNorm = 184.6743, GNorm = 0.1912, lr_0 = 1.4241e-04
Loss = 1.8460e-03, PNorm = 184.6763, GNorm = 0.3387, lr_0 = 1.4231e-04
Loss = 2.2090e-03, PNorm = 184.6783, GNorm = 0.0817, lr_0 = 1.4222e-04
Loss = 7.1180e-03, PNorm = 184.6810, GNorm = 0.1069, lr_0 = 1.4212e-04
Loss = 3.3909e-03, PNorm = 184.6841, GNorm = 0.0744, lr_0 = 1.4202e-04
Loss = 2.5099e-03, PNorm = 184.6855, GNorm = 0.1001, lr_0 = 1.4192e-04
Loss = 5.8453e-03, PNorm = 184.6868, GNorm = 0.0678, lr_0 = 1.4183e-04
Loss = 7.0838e-03, PNorm = 184.6880, GNorm = 0.0938, lr_0 = 1.4173e-04
Loss = 5.7755e-03, PNorm = 184.6907, GNorm = 0.1837, lr_0 = 1.4163e-04
Loss = 5.7663e-03, PNorm = 184.6913, GNorm = 0.2158, lr_0 = 1.4153e-04
Loss = 7.1229e-03, PNorm = 184.6949, GNorm = 0.1346, lr_0 = 1.4144e-04
Loss = 5.3837e-03, PNorm = 184.6980, GNorm = 0.0992, lr_0 = 1.4134e-04
Loss = 7.2986e-03, PNorm = 184.6995, GNorm = 0.0701, lr_0 = 1.4124e-04
Loss = 4.4364e-03, PNorm = 184.7013, GNorm = 0.0987, lr_0 = 1.4115e-04
Loss = 4.7252e-03, PNorm = 184.7034, GNorm = 0.0894, lr_0 = 1.4105e-04
Loss = 3.1898e-03, PNorm = 184.7065, GNorm = 0.1008, lr_0 = 1.4095e-04
Loss = 3.7885e-03, PNorm = 184.7095, GNorm = 0.1410, lr_0 = 1.4086e-04
Loss = 3.0889e-03, PNorm = 184.7113, GNorm = 0.1941, lr_0 = 1.4076e-04
Loss = 3.0916e-03, PNorm = 184.7131, GNorm = 0.1575, lr_0 = 1.4066e-04
Loss = 1.0104e-02, PNorm = 184.7163, GNorm = 0.0865, lr_0 = 1.4057e-04
Loss = 1.9383e-03, PNorm = 184.7189, GNorm = 0.2023, lr_0 = 1.4047e-04
Loss = 1.9679e-03, PNorm = 184.7209, GNorm = 0.1826, lr_0 = 1.4038e-04
Loss = 4.8207e-03, PNorm = 184.7237, GNorm = 0.0863, lr_0 = 1.4028e-04
Loss = 2.7639e-03, PNorm = 184.7282, GNorm = 0.0608, lr_0 = 1.4018e-04
Loss = 1.9816e-03, PNorm = 184.7321, GNorm = 0.2906, lr_0 = 1.4009e-04
Loss = 5.8383e-03, PNorm = 184.7329, GNorm = 0.5075, lr_0 = 1.3999e-04
Loss = 4.5165e-03, PNorm = 184.7355, GNorm = 0.1847, lr_0 = 1.3990e-04
Loss = 2.5119e-03, PNorm = 184.7388, GNorm = 0.1034, lr_0 = 1.3980e-04
Loss = 2.8857e-03, PNorm = 184.7424, GNorm = 0.0960, lr_0 = 1.3970e-04
Loss = 2.0951e-03, PNorm = 184.7461, GNorm = 0.0548, lr_0 = 1.3961e-04
Loss = 3.8235e-03, PNorm = 184.7474, GNorm = 0.0724, lr_0 = 1.3951e-04
Loss = 2.0762e-03, PNorm = 184.7489, GNorm = 0.0642, lr_0 = 1.3942e-04
Loss = 1.8447e-03, PNorm = 184.7512, GNorm = 0.0583, lr_0 = 1.3932e-04
Loss = 2.4410e-03, PNorm = 184.7535, GNorm = 0.0836, lr_0 = 1.3923e-04
Loss = 7.0045e-03, PNorm = 184.7541, GNorm = 0.1013, lr_0 = 1.3913e-04
Loss = 1.5534e-03, PNorm = 184.7555, GNorm = 0.0558, lr_0 = 1.3904e-04
Loss = 3.3726e-03, PNorm = 184.7576, GNorm = 0.1002, lr_0 = 1.3894e-04
Validation mae = 0.121170
Epoch 26
Loss = 4.2753e-03, PNorm = 184.7599, GNorm = 0.0907, lr_0 = 1.3884e-04
Loss = 6.1934e-03, PNorm = 184.7619, GNorm = 0.0633, lr_0 = 1.3875e-04
Loss = 7.4274e-03, PNorm = 184.7614, GNorm = 0.1528, lr_0 = 1.3865e-04
Loss = 3.5504e-03, PNorm = 184.7609, GNorm = 0.0558, lr_0 = 1.3856e-04
Loss = 2.1165e-03, PNorm = 184.7620, GNorm = 0.0710, lr_0 = 1.3846e-04
Loss = 3.4346e-03, PNorm = 184.7638, GNorm = 0.1939, lr_0 = 1.3837e-04
Loss = 3.2287e-03, PNorm = 184.7654, GNorm = 0.1530, lr_0 = 1.3828e-04
Loss = 1.8014e-03, PNorm = 184.7663, GNorm = 0.1442, lr_0 = 1.3818e-04
Loss = 3.7947e-03, PNorm = 184.7682, GNorm = 0.1260, lr_0 = 1.3809e-04
Loss = 1.5926e-03, PNorm = 184.7693, GNorm = 0.2006, lr_0 = 1.3799e-04
Loss = 3.8043e-03, PNorm = 184.7728, GNorm = 0.0749, lr_0 = 1.3790e-04
Loss = 1.7795e-03, PNorm = 184.7770, GNorm = 0.1289, lr_0 = 1.3780e-04
Loss = 1.7186e-03, PNorm = 184.7797, GNorm = 0.1039, lr_0 = 1.3771e-04
Loss = 5.2044e-03, PNorm = 184.7815, GNorm = 0.1266, lr_0 = 1.3761e-04
Loss = 2.4980e-03, PNorm = 184.7831, GNorm = 0.0995, lr_0 = 1.3752e-04
Loss = 2.3824e-03, PNorm = 184.7846, GNorm = 0.2736, lr_0 = 1.3742e-04
Loss = 1.5504e-03, PNorm = 184.7871, GNorm = 0.1392, lr_0 = 1.3733e-04
Loss = 2.3606e-03, PNorm = 184.7897, GNorm = 0.1105, lr_0 = 1.3724e-04
Loss = 1.3941e-03, PNorm = 184.7912, GNorm = 0.0865, lr_0 = 1.3714e-04
Loss = 1.4535e-03, PNorm = 184.7919, GNorm = 0.0689, lr_0 = 1.3705e-04
Loss = 3.4058e-03, PNorm = 184.7932, GNorm = 0.0593, lr_0 = 1.3695e-04
Loss = 4.5830e-03, PNorm = 184.7940, GNorm = 0.2584, lr_0 = 1.3686e-04
Loss = 1.6773e-03, PNorm = 184.7954, GNorm = 0.0798, lr_0 = 1.3677e-04
Loss = 3.8660e-03, PNorm = 184.7980, GNorm = 0.1614, lr_0 = 1.3667e-04
Loss = 4.3531e-03, PNorm = 184.7994, GNorm = 1.4147, lr_0 = 1.3658e-04
Loss = 5.4828e-03, PNorm = 184.8013, GNorm = 0.1622, lr_0 = 1.3649e-04
Loss = 2.6422e-03, PNorm = 184.8031, GNorm = 0.1172, lr_0 = 1.3639e-04
Loss = 3.1293e-03, PNorm = 184.8046, GNorm = 0.3969, lr_0 = 1.3630e-04
Loss = 4.3030e-03, PNorm = 184.8065, GNorm = 0.0910, lr_0 = 1.3621e-04
Loss = 2.1393e-03, PNorm = 184.8085, GNorm = 0.1160, lr_0 = 1.3611e-04
Loss = 2.5728e-03, PNorm = 184.8099, GNorm = 0.1696, lr_0 = 1.3602e-04
Loss = 1.7875e-03, PNorm = 184.8112, GNorm = 0.0726, lr_0 = 1.3593e-04
Loss = 2.5781e-03, PNorm = 184.8137, GNorm = 0.1291, lr_0 = 1.3583e-04
Loss = 2.8344e-03, PNorm = 184.8163, GNorm = 0.0825, lr_0 = 1.3574e-04
Loss = 2.7760e-03, PNorm = 184.8200, GNorm = 0.1003, lr_0 = 1.3565e-04
Loss = 3.5386e-03, PNorm = 184.8223, GNorm = 0.0811, lr_0 = 1.3555e-04
Loss = 3.6731e-03, PNorm = 184.8238, GNorm = 0.1226, lr_0 = 1.3546e-04
Loss = 3.5839e-03, PNorm = 184.8247, GNorm = 0.0976, lr_0 = 1.3537e-04
Loss = 2.7589e-03, PNorm = 184.8253, GNorm = 0.0708, lr_0 = 1.3528e-04
Loss = 2.5643e-03, PNorm = 184.8271, GNorm = 0.0670, lr_0 = 1.3518e-04
Loss = 6.5940e-03, PNorm = 184.8277, GNorm = 0.3052, lr_0 = 1.3509e-04
Loss = 4.4731e-03, PNorm = 184.8284, GNorm = 0.1145, lr_0 = 1.3500e-04
Loss = 7.5912e-03, PNorm = 184.8318, GNorm = 0.1800, lr_0 = 1.3491e-04
Loss = 5.6184e-03, PNorm = 184.8332, GNorm = 0.1134, lr_0 = 1.3481e-04
Loss = 3.6969e-03, PNorm = 184.8347, GNorm = 0.1425, lr_0 = 1.3472e-04
Loss = 2.9304e-03, PNorm = 184.8380, GNorm = 0.0578, lr_0 = 1.3463e-04
Loss = 4.6813e-03, PNorm = 184.8401, GNorm = 0.0907, lr_0 = 1.3454e-04
Loss = 2.3988e-03, PNorm = 184.8412, GNorm = 0.1258, lr_0 = 1.3444e-04
Loss = 4.8158e-03, PNorm = 184.8437, GNorm = 0.0988, lr_0 = 1.3435e-04
Loss = 2.9029e-03, PNorm = 184.8462, GNorm = 0.0959, lr_0 = 1.3426e-04
Loss = 2.3507e-03, PNorm = 184.8480, GNorm = 0.0883, lr_0 = 1.3417e-04
Loss = 5.1339e-03, PNorm = 184.8511, GNorm = 0.1441, lr_0 = 1.3408e-04
Loss = 4.6831e-03, PNorm = 184.8534, GNorm = 0.3230, lr_0 = 1.3398e-04
Loss = 3.2048e-03, PNorm = 184.8544, GNorm = 0.0747, lr_0 = 1.3389e-04
Loss = 2.2461e-03, PNorm = 184.8549, GNorm = 0.1945, lr_0 = 1.3380e-04
Loss = 5.8004e-03, PNorm = 184.8566, GNorm = 0.1201, lr_0 = 1.3371e-04
Loss = 2.4019e-03, PNorm = 184.8588, GNorm = 0.0724, lr_0 = 1.3362e-04
Loss = 3.3352e-03, PNorm = 184.8608, GNorm = 0.2513, lr_0 = 1.3353e-04
Loss = 1.9598e-03, PNorm = 184.8632, GNorm = 0.0423, lr_0 = 1.3343e-04
Loss = 3.0085e-03, PNorm = 184.8651, GNorm = 0.4109, lr_0 = 1.3334e-04
Loss = 5.6020e-03, PNorm = 184.8660, GNorm = 0.1889, lr_0 = 1.3325e-04
Loss = 3.6373e-03, PNorm = 184.8687, GNorm = 0.0693, lr_0 = 1.3316e-04
Loss = 3.1319e-03, PNorm = 184.8713, GNorm = 0.0696, lr_0 = 1.3307e-04
Loss = 5.3527e-03, PNorm = 184.8731, GNorm = 0.0785, lr_0 = 1.3298e-04
Loss = 2.0380e-03, PNorm = 184.8739, GNorm = 0.0818, lr_0 = 1.3289e-04
Loss = 3.2062e-03, PNorm = 184.8755, GNorm = 0.0829, lr_0 = 1.3280e-04
Loss = 3.3696e-03, PNorm = 184.8771, GNorm = 0.0542, lr_0 = 1.3270e-04
Loss = 2.8394e-03, PNorm = 184.8790, GNorm = 0.1491, lr_0 = 1.3261e-04
Loss = 1.6062e-03, PNorm = 184.8802, GNorm = 0.0492, lr_0 = 1.3252e-04
Loss = 1.5280e-03, PNorm = 184.8804, GNorm = 0.1147, lr_0 = 1.3243e-04
Loss = 3.0116e-03, PNorm = 184.8811, GNorm = 0.1215, lr_0 = 1.3234e-04
Loss = 2.7891e-03, PNorm = 184.8832, GNorm = 0.0726, lr_0 = 1.3225e-04
Loss = 7.5908e-03, PNorm = 184.8859, GNorm = 0.0977, lr_0 = 1.3216e-04
Loss = 3.4861e-03, PNorm = 184.8875, GNorm = 0.2103, lr_0 = 1.3207e-04
Loss = 3.0123e-03, PNorm = 184.8900, GNorm = 0.0501, lr_0 = 1.3198e-04
Loss = 1.7971e-03, PNorm = 184.8922, GNorm = 0.1215, lr_0 = 1.3189e-04
Loss = 2.0946e-03, PNorm = 184.8933, GNorm = 0.1564, lr_0 = 1.3180e-04
Loss = 5.9124e-03, PNorm = 184.8954, GNorm = 0.1568, lr_0 = 1.3171e-04
Loss = 3.2356e-03, PNorm = 184.8978, GNorm = 0.1479, lr_0 = 1.3162e-04
Loss = 1.8458e-03, PNorm = 184.8995, GNorm = 0.1012, lr_0 = 1.3153e-04
Loss = 3.9751e-03, PNorm = 184.9017, GNorm = 0.1149, lr_0 = 1.3144e-04
Loss = 5.5866e-03, PNorm = 184.9034, GNorm = 0.1106, lr_0 = 1.3135e-04
Loss = 3.7974e-03, PNorm = 184.9048, GNorm = 0.0820, lr_0 = 1.3126e-04
Loss = 4.5923e-03, PNorm = 184.9069, GNorm = 0.1093, lr_0 = 1.3117e-04
Loss = 1.3185e-03, PNorm = 184.9084, GNorm = 0.0837, lr_0 = 1.3108e-04
Loss = 1.3429e-03, PNorm = 184.9101, GNorm = 0.1101, lr_0 = 1.3099e-04
Loss = 6.4811e-03, PNorm = 184.9126, GNorm = 0.1358, lr_0 = 1.3090e-04
Loss = 3.5331e-03, PNorm = 184.9147, GNorm = 0.1530, lr_0 = 1.3081e-04
Loss = 4.4586e-03, PNorm = 184.9176, GNorm = 0.2486, lr_0 = 1.3072e-04
Loss = 3.2543e-03, PNorm = 184.9196, GNorm = 0.0776, lr_0 = 1.3063e-04
Loss = 3.6497e-03, PNorm = 184.9200, GNorm = 0.1968, lr_0 = 1.3054e-04
Loss = 2.9817e-03, PNorm = 184.9199, GNorm = 0.2193, lr_0 = 1.3045e-04
Loss = 2.4807e-03, PNorm = 184.9219, GNorm = 0.0961, lr_0 = 1.3036e-04
Loss = 1.6005e-03, PNorm = 184.9249, GNorm = 0.0647, lr_0 = 1.3027e-04
Loss = 1.6308e-03, PNorm = 184.9294, GNorm = 0.0692, lr_0 = 1.3018e-04
Loss = 4.3528e-03, PNorm = 184.9326, GNorm = 0.1843, lr_0 = 1.3009e-04
Loss = 5.5453e-03, PNorm = 184.9352, GNorm = 0.1091, lr_0 = 1.3000e-04
Loss = 3.7589e-03, PNorm = 184.9375, GNorm = 0.0556, lr_0 = 1.2992e-04
Loss = 1.6757e-03, PNorm = 184.9396, GNorm = 0.1416, lr_0 = 1.2983e-04
Loss = 4.5089e-03, PNorm = 184.9408, GNorm = 0.1343, lr_0 = 1.2974e-04
Loss = 2.5338e-03, PNorm = 184.9436, GNorm = 0.0439, lr_0 = 1.2965e-04
Loss = 1.2232e-03, PNorm = 184.9463, GNorm = 0.0496, lr_0 = 1.2956e-04
Loss = 2.4903e-03, PNorm = 184.9486, GNorm = 0.2073, lr_0 = 1.2947e-04
Loss = 2.6953e-03, PNorm = 184.9522, GNorm = 0.0467, lr_0 = 1.2938e-04
Loss = 4.2976e-03, PNorm = 184.9551, GNorm = 0.1749, lr_0 = 1.2929e-04
Loss = 7.3227e-03, PNorm = 184.9562, GNorm = 0.1152, lr_0 = 1.2921e-04
Loss = 1.8005e-02, PNorm = 184.9595, GNorm = 0.4639, lr_0 = 1.2912e-04
Loss = 4.7319e-03, PNorm = 184.9623, GNorm = 0.0495, lr_0 = 1.2903e-04
Loss = 6.2111e-03, PNorm = 184.9639, GNorm = 0.1379, lr_0 = 1.2894e-04
Loss = 3.4834e-03, PNorm = 184.9659, GNorm = 0.1394, lr_0 = 1.2885e-04
Loss = 4.5647e-03, PNorm = 184.9673, GNorm = 0.1575, lr_0 = 1.2876e-04
Loss = 1.7412e-03, PNorm = 184.9678, GNorm = 0.1497, lr_0 = 1.2867e-04
Loss = 2.6093e-03, PNorm = 184.9703, GNorm = 0.1475, lr_0 = 1.2859e-04
Loss = 2.3108e-03, PNorm = 184.9719, GNorm = 0.1045, lr_0 = 1.2850e-04
Loss = 5.2225e-03, PNorm = 184.9723, GNorm = 0.0978, lr_0 = 1.2841e-04
Loss = 3.4898e-03, PNorm = 184.9735, GNorm = 0.1176, lr_0 = 1.2832e-04
Loss = 1.9959e-03, PNorm = 184.9762, GNorm = 0.1062, lr_0 = 1.2823e-04
Loss = 2.0521e-03, PNorm = 184.9788, GNorm = 0.0935, lr_0 = 1.2815e-04
Loss = 4.9439e-03, PNorm = 184.9813, GNorm = 0.1356, lr_0 = 1.2806e-04
Loss = 2.3808e-03, PNorm = 184.9829, GNorm = 0.0835, lr_0 = 1.2797e-04
Validation mae = 0.121240
Epoch 27
Loss = 2.8113e-03, PNorm = 184.9843, GNorm = 0.1440, lr_0 = 1.2788e-04
Loss = 4.2144e-03, PNorm = 184.9859, GNorm = 0.0837, lr_0 = 1.2780e-04
Loss = 3.2480e-03, PNorm = 184.9874, GNorm = 0.0704, lr_0 = 1.2771e-04
Loss = 2.4972e-03, PNorm = 184.9878, GNorm = 0.0895, lr_0 = 1.2762e-04
Loss = 1.4084e-03, PNorm = 184.9895, GNorm = 0.1019, lr_0 = 1.2753e-04
Loss = 3.7096e-03, PNorm = 184.9928, GNorm = 0.0347, lr_0 = 1.2745e-04
Loss = 2.7300e-03, PNorm = 184.9939, GNorm = 0.1338, lr_0 = 1.2736e-04
Loss = 3.7681e-03, PNorm = 184.9952, GNorm = 0.1203, lr_0 = 1.2727e-04
Loss = 2.3975e-03, PNorm = 184.9959, GNorm = 0.0723, lr_0 = 1.2718e-04
Loss = 2.7378e-03, PNorm = 184.9970, GNorm = 0.3139, lr_0 = 1.2710e-04
Loss = 2.0239e-03, PNorm = 184.9970, GNorm = 0.0907, lr_0 = 1.2701e-04
Loss = 2.5293e-03, PNorm = 184.9989, GNorm = 0.1210, lr_0 = 1.2692e-04
Loss = 4.6370e-03, PNorm = 185.0007, GNorm = 0.1410, lr_0 = 1.2684e-04
Loss = 1.8266e-03, PNorm = 185.0014, GNorm = 0.1297, lr_0 = 1.2675e-04
Loss = 1.5566e-03, PNorm = 185.0030, GNorm = 0.1144, lr_0 = 1.2666e-04
Loss = 2.7130e-03, PNorm = 185.0050, GNorm = 0.2764, lr_0 = 1.2658e-04
Loss = 1.4564e-03, PNorm = 185.0050, GNorm = 0.0949, lr_0 = 1.2649e-04
Loss = 2.3295e-03, PNorm = 185.0047, GNorm = 0.0862, lr_0 = 1.2640e-04
Loss = 2.7357e-03, PNorm = 185.0062, GNorm = 0.0621, lr_0 = 1.2632e-04
Loss = 2.2624e-03, PNorm = 185.0084, GNorm = 0.0710, lr_0 = 1.2623e-04
Loss = 4.3386e-03, PNorm = 185.0102, GNorm = 0.0418, lr_0 = 1.2614e-04
Loss = 2.1644e-03, PNorm = 185.0128, GNorm = 0.1553, lr_0 = 1.2606e-04
Loss = 3.0271e-03, PNorm = 185.0139, GNorm = 0.0741, lr_0 = 1.2597e-04
Loss = 1.7399e-03, PNorm = 185.0146, GNorm = 0.0536, lr_0 = 1.2588e-04
Loss = 5.4288e-03, PNorm = 185.0155, GNorm = 0.1016, lr_0 = 1.2580e-04
Loss = 1.6227e-03, PNorm = 185.0171, GNorm = 0.0730, lr_0 = 1.2571e-04
Loss = 1.3170e-02, PNorm = 185.0177, GNorm = 0.6744, lr_0 = 1.2563e-04
Loss = 5.3496e-03, PNorm = 185.0191, GNorm = 0.1365, lr_0 = 1.2554e-04
Loss = 3.5784e-03, PNorm = 185.0215, GNorm = 0.2013, lr_0 = 1.2545e-04
Loss = 1.5502e-03, PNorm = 185.0244, GNorm = 0.1197, lr_0 = 1.2537e-04
Loss = 8.5455e-03, PNorm = 185.0252, GNorm = 0.1014, lr_0 = 1.2528e-04
Loss = 3.3311e-03, PNorm = 185.0262, GNorm = 0.3133, lr_0 = 1.2520e-04
Loss = 4.7221e-03, PNorm = 185.0297, GNorm = 0.1456, lr_0 = 1.2511e-04
Loss = 2.5476e-03, PNorm = 185.0332, GNorm = 0.0853, lr_0 = 1.2502e-04
Loss = 1.6539e-03, PNorm = 185.0366, GNorm = 0.2732, lr_0 = 1.2494e-04
Loss = 4.7571e-03, PNorm = 185.0399, GNorm = 0.2200, lr_0 = 1.2485e-04
Loss = 1.7955e-03, PNorm = 185.0439, GNorm = 0.1074, lr_0 = 1.2477e-04
Loss = 5.0464e-03, PNorm = 185.0467, GNorm = 0.1091, lr_0 = 1.2468e-04
Loss = 2.6971e-03, PNorm = 185.0503, GNorm = 0.0979, lr_0 = 1.2460e-04
Loss = 1.5046e-03, PNorm = 185.0530, GNorm = 0.1030, lr_0 = 1.2451e-04
Loss = 1.3659e-03, PNorm = 185.0550, GNorm = 0.0645, lr_0 = 1.2443e-04
Loss = 2.7774e-03, PNorm = 185.0570, GNorm = 0.0628, lr_0 = 1.2434e-04
Loss = 5.8937e-03, PNorm = 185.0581, GNorm = 0.0608, lr_0 = 1.2426e-04
Loss = 1.3266e-03, PNorm = 185.0602, GNorm = 0.0608, lr_0 = 1.2417e-04
Loss = 1.4274e-03, PNorm = 185.0611, GNorm = 0.1787, lr_0 = 1.2409e-04
Loss = 1.1807e-03, PNorm = 185.0619, GNorm = 0.0465, lr_0 = 1.2400e-04
Loss = 3.1667e-03, PNorm = 185.0622, GNorm = 0.0709, lr_0 = 1.2392e-04
Loss = 2.2674e-03, PNorm = 185.0638, GNorm = 0.1338, lr_0 = 1.2383e-04
Loss = 1.8052e-03, PNorm = 185.0644, GNorm = 0.0786, lr_0 = 1.2375e-04
Loss = 3.2880e-03, PNorm = 185.0650, GNorm = 0.1022, lr_0 = 1.2366e-04
Loss = 1.3932e-02, PNorm = 185.0682, GNorm = 0.1322, lr_0 = 1.2358e-04
Loss = 3.9437e-03, PNorm = 185.0693, GNorm = 0.1057, lr_0 = 1.2349e-04
Loss = 4.1108e-03, PNorm = 185.0710, GNorm = 1.3617, lr_0 = 1.2341e-04
Loss = 4.4547e-03, PNorm = 185.0725, GNorm = 0.0518, lr_0 = 1.2332e-04
Loss = 4.7723e-03, PNorm = 185.0757, GNorm = 0.1873, lr_0 = 1.2324e-04
Loss = 2.6684e-03, PNorm = 185.0767, GNorm = 0.0935, lr_0 = 1.2315e-04
Loss = 3.8651e-03, PNorm = 185.0776, GNorm = 0.1768, lr_0 = 1.2307e-04
Loss = 8.2013e-03, PNorm = 185.0797, GNorm = 0.0726, lr_0 = 1.2298e-04
Loss = 3.5737e-03, PNorm = 185.0824, GNorm = 0.0453, lr_0 = 1.2290e-04
Loss = 2.9687e-03, PNorm = 185.0847, GNorm = 0.1013, lr_0 = 1.2282e-04
Loss = 2.5300e-03, PNorm = 185.0874, GNorm = 0.1839, lr_0 = 1.2273e-04
Loss = 3.6930e-03, PNorm = 185.0903, GNorm = 0.1284, lr_0 = 1.2265e-04
Loss = 1.6377e-03, PNorm = 185.0900, GNorm = 0.0673, lr_0 = 1.2256e-04
Loss = 2.6268e-03, PNorm = 185.0911, GNorm = 0.0867, lr_0 = 1.2248e-04
Loss = 2.2371e-03, PNorm = 185.0925, GNorm = 0.1203, lr_0 = 1.2240e-04
Loss = 2.4610e-03, PNorm = 185.0943, GNorm = 0.0420, lr_0 = 1.2231e-04
Loss = 6.4270e-03, PNorm = 185.0960, GNorm = 0.1923, lr_0 = 1.2223e-04
Loss = 2.7521e-03, PNorm = 185.0977, GNorm = 0.1173, lr_0 = 1.2214e-04
Loss = 6.9836e-03, PNorm = 185.0992, GNorm = 0.0712, lr_0 = 1.2206e-04
Loss = 1.8069e-03, PNorm = 185.0999, GNorm = 0.0921, lr_0 = 1.2198e-04
Loss = 2.3883e-03, PNorm = 185.1015, GNorm = 0.1913, lr_0 = 1.2189e-04
Loss = 1.6381e-03, PNorm = 185.1025, GNorm = 0.0822, lr_0 = 1.2181e-04
Loss = 2.8873e-03, PNorm = 185.1038, GNorm = 0.0831, lr_0 = 1.2173e-04
Loss = 3.4977e-03, PNorm = 185.1053, GNorm = 0.1867, lr_0 = 1.2164e-04
Loss = 1.4455e-03, PNorm = 185.1086, GNorm = 0.0786, lr_0 = 1.2156e-04
Loss = 2.4247e-03, PNorm = 185.1103, GNorm = 0.1524, lr_0 = 1.2148e-04
Loss = 1.3299e-03, PNorm = 185.1099, GNorm = 0.0730, lr_0 = 1.2139e-04
Loss = 3.9271e-03, PNorm = 185.1116, GNorm = 0.1880, lr_0 = 1.2131e-04
Loss = 5.3981e-03, PNorm = 185.1121, GNorm = 0.0967, lr_0 = 1.2123e-04
Loss = 2.8053e-03, PNorm = 185.1133, GNorm = 0.0754, lr_0 = 1.2114e-04
Loss = 1.3603e-03, PNorm = 185.1137, GNorm = 0.1064, lr_0 = 1.2106e-04
Loss = 2.0702e-03, PNorm = 185.1140, GNorm = 0.2175, lr_0 = 1.2098e-04
Loss = 3.8755e-03, PNorm = 185.1161, GNorm = 0.1001, lr_0 = 1.2090e-04
Loss = 2.2867e-03, PNorm = 185.1177, GNorm = 0.0286, lr_0 = 1.2081e-04
Loss = 3.7323e-03, PNorm = 185.1187, GNorm = 0.1757, lr_0 = 1.2073e-04
Loss = 5.0947e-03, PNorm = 185.1197, GNorm = 0.0514, lr_0 = 1.2065e-04
Loss = 5.3405e-03, PNorm = 185.1211, GNorm = 0.6889, lr_0 = 1.2056e-04
Loss = 4.2954e-03, PNorm = 185.1224, GNorm = 0.3035, lr_0 = 1.2048e-04
Loss = 2.0997e-03, PNorm = 185.1248, GNorm = 0.1065, lr_0 = 1.2040e-04
Loss = 5.2363e-03, PNorm = 185.1252, GNorm = 0.1044, lr_0 = 1.2032e-04
Loss = 7.0777e-03, PNorm = 185.1258, GNorm = 0.5288, lr_0 = 1.2023e-04
Loss = 3.2079e-03, PNorm = 185.1258, GNorm = 0.1704, lr_0 = 1.2015e-04
Loss = 1.4913e-03, PNorm = 185.1280, GNorm = 0.0678, lr_0 = 1.2007e-04
Loss = 1.5057e-03, PNorm = 185.1303, GNorm = 0.0467, lr_0 = 1.1999e-04
Loss = 3.1079e-03, PNorm = 185.1314, GNorm = 0.0460, lr_0 = 1.1991e-04
Loss = 3.3561e-03, PNorm = 185.1328, GNorm = 0.0781, lr_0 = 1.1982e-04
Loss = 1.6495e-03, PNorm = 185.1354, GNorm = 0.1906, lr_0 = 1.1974e-04
Loss = 1.0755e-03, PNorm = 185.1373, GNorm = 0.1047, lr_0 = 1.1966e-04
Loss = 6.9582e-03, PNorm = 185.1392, GNorm = 0.0716, lr_0 = 1.1958e-04
Loss = 1.4521e-03, PNorm = 185.1405, GNorm = 0.1026, lr_0 = 1.1950e-04
Loss = 5.0171e-03, PNorm = 185.1422, GNorm = 0.0689, lr_0 = 1.1941e-04
Loss = 1.4489e-03, PNorm = 185.1456, GNorm = 0.0807, lr_0 = 1.1933e-04
Loss = 1.5914e-03, PNorm = 185.1500, GNorm = 0.1232, lr_0 = 1.1925e-04
Loss = 6.1804e-03, PNorm = 185.1512, GNorm = 0.1146, lr_0 = 1.1917e-04
Loss = 5.8818e-03, PNorm = 185.1517, GNorm = 0.1261, lr_0 = 1.1909e-04
Loss = 1.1359e-02, PNorm = 185.1542, GNorm = 0.0576, lr_0 = 1.1901e-04
Loss = 2.3752e-03, PNorm = 185.1566, GNorm = 0.1814, lr_0 = 1.1892e-04
Loss = 1.3294e-03, PNorm = 185.1580, GNorm = 0.0558, lr_0 = 1.1884e-04
Loss = 2.8550e-03, PNorm = 185.1594, GNorm = 0.1263, lr_0 = 1.1876e-04
Loss = 1.0994e-03, PNorm = 185.1607, GNorm = 0.0482, lr_0 = 1.1868e-04
Loss = 3.1132e-03, PNorm = 185.1620, GNorm = 0.1843, lr_0 = 1.1860e-04
Loss = 2.5591e-03, PNorm = 185.1631, GNorm = 0.1204, lr_0 = 1.1852e-04
Loss = 3.6935e-03, PNorm = 185.1633, GNorm = 0.0592, lr_0 = 1.1844e-04
Loss = 1.6917e-03, PNorm = 185.1651, GNorm = 0.0619, lr_0 = 1.1835e-04
Loss = 9.6557e-04, PNorm = 185.1666, GNorm = 0.0637, lr_0 = 1.1827e-04
Loss = 1.3188e-03, PNorm = 185.1686, GNorm = 0.1016, lr_0 = 1.1819e-04
Loss = 1.3535e-03, PNorm = 185.1708, GNorm = 0.0409, lr_0 = 1.1811e-04
Loss = 1.4502e-03, PNorm = 185.1715, GNorm = 0.1151, lr_0 = 1.1803e-04
Loss = 2.5569e-03, PNorm = 185.1737, GNorm = 0.0704, lr_0 = 1.1795e-04
Loss = 1.8802e-03, PNorm = 185.1740, GNorm = 0.0373, lr_0 = 1.1787e-04
Validation mae = 0.120950
Epoch 28
Loss = 1.6720e-03, PNorm = 185.1740, GNorm = 0.0535, lr_0 = 1.1779e-04
Loss = 2.7047e-03, PNorm = 185.1749, GNorm = 0.4475, lr_0 = 1.1771e-04
Loss = 2.4186e-03, PNorm = 185.1774, GNorm = 0.0804, lr_0 = 1.1763e-04
Loss = 5.5502e-03, PNorm = 185.1794, GNorm = 0.0951, lr_0 = 1.1755e-04
Loss = 1.3458e-03, PNorm = 185.1794, GNorm = 0.1991, lr_0 = 1.1747e-04
Loss = 1.6150e-03, PNorm = 185.1800, GNorm = 0.1200, lr_0 = 1.1739e-04
Loss = 1.1594e-03, PNorm = 185.1815, GNorm = 0.0420, lr_0 = 1.1730e-04
Loss = 3.2787e-03, PNorm = 185.1827, GNorm = 0.1216, lr_0 = 1.1722e-04
Loss = 1.1730e-03, PNorm = 185.1840, GNorm = 0.0699, lr_0 = 1.1714e-04
Loss = 2.7481e-03, PNorm = 185.1858, GNorm = 0.1190, lr_0 = 1.1706e-04
Loss = 2.2712e-03, PNorm = 185.1870, GNorm = 0.0720, lr_0 = 1.1698e-04
Loss = 1.3721e-03, PNorm = 185.1885, GNorm = 0.1303, lr_0 = 1.1690e-04
Loss = 4.1637e-03, PNorm = 185.1891, GNorm = 0.0402, lr_0 = 1.1682e-04
Loss = 2.8781e-03, PNorm = 185.1908, GNorm = 0.0448, lr_0 = 1.1674e-04
Loss = 1.8097e-03, PNorm = 185.1923, GNorm = 0.1517, lr_0 = 1.1666e-04
Loss = 2.2340e-03, PNorm = 185.1927, GNorm = 0.1385, lr_0 = 1.1658e-04
Loss = 1.1483e-03, PNorm = 185.1936, GNorm = 0.0252, lr_0 = 1.1650e-04
Loss = 1.5081e-03, PNorm = 185.1949, GNorm = 0.0624, lr_0 = 1.1642e-04
Loss = 1.7103e-03, PNorm = 185.1957, GNorm = 0.0691, lr_0 = 1.1634e-04
Loss = 2.0018e-03, PNorm = 185.1971, GNorm = 0.1546, lr_0 = 1.1626e-04
Loss = 4.4263e-03, PNorm = 185.1991, GNorm = 0.0931, lr_0 = 1.1618e-04
Loss = 3.7570e-03, PNorm = 185.2005, GNorm = 0.1508, lr_0 = 1.1611e-04
Loss = 3.1249e-03, PNorm = 185.2027, GNorm = 0.0827, lr_0 = 1.1603e-04
Loss = 2.2656e-03, PNorm = 185.2038, GNorm = 0.1414, lr_0 = 1.1595e-04
Loss = 1.4584e-03, PNorm = 185.2046, GNorm = 0.1425, lr_0 = 1.1587e-04
Loss = 2.6645e-03, PNorm = 185.2052, GNorm = 0.0488, lr_0 = 1.1579e-04
Loss = 1.2342e-02, PNorm = 185.2074, GNorm = 0.1107, lr_0 = 1.1571e-04
Loss = 1.5845e-03, PNorm = 185.2071, GNorm = 0.0715, lr_0 = 1.1563e-04
Loss = 2.5078e-03, PNorm = 185.2076, GNorm = 0.1358, lr_0 = 1.1555e-04
Loss = 2.2518e-03, PNorm = 185.2075, GNorm = 0.0689, lr_0 = 1.1547e-04
Loss = 3.7148e-03, PNorm = 185.2085, GNorm = 0.0547, lr_0 = 1.1539e-04
Loss = 5.0877e-03, PNorm = 185.2111, GNorm = 0.2182, lr_0 = 1.1531e-04
Loss = 2.8508e-03, PNorm = 185.2141, GNorm = 0.0610, lr_0 = 1.1523e-04
Loss = 2.6512e-03, PNorm = 185.2149, GNorm = 0.2019, lr_0 = 1.1515e-04
Loss = 2.0331e-03, PNorm = 185.2158, GNorm = 0.0519, lr_0 = 1.1508e-04
Loss = 2.1321e-03, PNorm = 185.2171, GNorm = 0.1676, lr_0 = 1.1500e-04
Loss = 5.0258e-03, PNorm = 185.2189, GNorm = 0.0775, lr_0 = 1.1492e-04
Loss = 4.1595e-03, PNorm = 185.2203, GNorm = 0.0905, lr_0 = 1.1484e-04
Loss = 4.3762e-03, PNorm = 185.2221, GNorm = 0.7379, lr_0 = 1.1476e-04
Loss = 2.2029e-03, PNorm = 185.2245, GNorm = 0.1562, lr_0 = 1.1468e-04
Loss = 1.3759e-03, PNorm = 185.2256, GNorm = 0.0868, lr_0 = 1.1460e-04
Loss = 4.7651e-03, PNorm = 185.2255, GNorm = 0.0529, lr_0 = 1.1452e-04
Loss = 6.4757e-03, PNorm = 185.2245, GNorm = 0.1450, lr_0 = 1.1445e-04
Loss = 4.1581e-03, PNorm = 185.2259, GNorm = 0.1101, lr_0 = 1.1437e-04
Loss = 1.8846e-03, PNorm = 185.2278, GNorm = 0.0613, lr_0 = 1.1429e-04
Loss = 6.0271e-03, PNorm = 185.2287, GNorm = 0.7222, lr_0 = 1.1421e-04
Loss = 3.0347e-03, PNorm = 185.2299, GNorm = 0.0480, lr_0 = 1.1413e-04
Loss = 1.3852e-03, PNorm = 185.2301, GNorm = 0.0580, lr_0 = 1.1405e-04
Loss = 2.9832e-03, PNorm = 185.2311, GNorm = 0.0745, lr_0 = 1.1398e-04
Loss = 2.0263e-03, PNorm = 185.2323, GNorm = 0.2046, lr_0 = 1.1390e-04
Loss = 5.0873e-03, PNorm = 185.2343, GNorm = 0.0636, lr_0 = 1.1382e-04
Loss = 1.6342e-03, PNorm = 185.2361, GNorm = 0.0734, lr_0 = 1.1374e-04
Loss = 4.4514e-03, PNorm = 185.2386, GNorm = 0.0824, lr_0 = 1.1366e-04
Loss = 2.8933e-03, PNorm = 185.2393, GNorm = 0.1479, lr_0 = 1.1359e-04
Loss = 2.0636e-03, PNorm = 185.2402, GNorm = 0.0805, lr_0 = 1.1351e-04
Loss = 9.5629e-04, PNorm = 185.2417, GNorm = 0.0640, lr_0 = 1.1343e-04
Loss = 6.7181e-03, PNorm = 185.2424, GNorm = 0.8398, lr_0 = 1.1335e-04
Loss = 1.7374e-03, PNorm = 185.2419, GNorm = 0.1227, lr_0 = 1.1328e-04
Loss = 2.9444e-03, PNorm = 185.2437, GNorm = 0.0515, lr_0 = 1.1320e-04
Loss = 1.4003e-03, PNorm = 185.2456, GNorm = 0.1201, lr_0 = 1.1312e-04
Loss = 2.2103e-03, PNorm = 185.2469, GNorm = 0.0428, lr_0 = 1.1304e-04
Loss = 2.2510e-03, PNorm = 185.2479, GNorm = 0.0441, lr_0 = 1.1297e-04
Loss = 2.3542e-03, PNorm = 185.2489, GNorm = 0.0494, lr_0 = 1.1289e-04
Loss = 2.6208e-03, PNorm = 185.2505, GNorm = 0.1160, lr_0 = 1.1281e-04
Loss = 3.3833e-03, PNorm = 185.2525, GNorm = 0.0636, lr_0 = 1.1273e-04
Loss = 4.8431e-03, PNorm = 185.2537, GNorm = 0.2630, lr_0 = 1.1266e-04
Loss = 2.2306e-03, PNorm = 185.2547, GNorm = 0.1000, lr_0 = 1.1258e-04
Loss = 3.9427e-03, PNorm = 185.2556, GNorm = 0.0561, lr_0 = 1.1250e-04
Loss = 1.7434e-03, PNorm = 185.2565, GNorm = 0.1502, lr_0 = 1.1243e-04
Loss = 1.4232e-03, PNorm = 185.2586, GNorm = 0.1787, lr_0 = 1.1235e-04
Loss = 1.0347e-03, PNorm = 185.2592, GNorm = 0.0930, lr_0 = 1.1227e-04
Loss = 1.6087e-03, PNorm = 185.2598, GNorm = 0.0582, lr_0 = 1.1219e-04
Loss = 3.5690e-03, PNorm = 185.2604, GNorm = 0.0848, lr_0 = 1.1212e-04
Loss = 1.5318e-03, PNorm = 185.2611, GNorm = 0.0570, lr_0 = 1.1204e-04
Loss = 4.2647e-03, PNorm = 185.2628, GNorm = 0.0622, lr_0 = 1.1196e-04
Loss = 2.4409e-03, PNorm = 185.2638, GNorm = 0.1643, lr_0 = 1.1189e-04
Loss = 9.5133e-04, PNorm = 185.2649, GNorm = 0.1087, lr_0 = 1.1181e-04
Loss = 2.9966e-03, PNorm = 185.2664, GNorm = 0.0624, lr_0 = 1.1173e-04
Loss = 1.0253e-03, PNorm = 185.2684, GNorm = 0.1495, lr_0 = 1.1166e-04
Loss = 2.8974e-03, PNorm = 185.2702, GNorm = 0.0938, lr_0 = 1.1158e-04
Loss = 3.9395e-03, PNorm = 185.2704, GNorm = 0.0826, lr_0 = 1.1150e-04
Loss = 1.5186e-03, PNorm = 185.2721, GNorm = 0.1742, lr_0 = 1.1143e-04
Loss = 2.1386e-03, PNorm = 185.2744, GNorm = 0.0948, lr_0 = 1.1135e-04
Loss = 1.3610e-03, PNorm = 185.2764, GNorm = 0.0793, lr_0 = 1.1128e-04
Loss = 8.0290e-03, PNorm = 185.2783, GNorm = 0.3927, lr_0 = 1.1120e-04
Loss = 4.1152e-03, PNorm = 185.2809, GNorm = 0.0516, lr_0 = 1.1112e-04
Loss = 2.0643e-03, PNorm = 185.2822, GNorm = 0.0779, lr_0 = 1.1105e-04
Loss = 2.9598e-03, PNorm = 185.2828, GNorm = 0.0669, lr_0 = 1.1097e-04
Loss = 2.9184e-03, PNorm = 185.2835, GNorm = 0.0737, lr_0 = 1.1089e-04
Loss = 3.7365e-03, PNorm = 185.2842, GNorm = 0.1552, lr_0 = 1.1082e-04
Loss = 9.0711e-03, PNorm = 185.2829, GNorm = 0.1611, lr_0 = 1.1074e-04
Loss = 2.4330e-03, PNorm = 185.2827, GNorm = 0.0811, lr_0 = 1.1067e-04
Loss = 6.2913e-03, PNorm = 185.2828, GNorm = 0.0538, lr_0 = 1.1059e-04
Loss = 1.0552e-03, PNorm = 185.2837, GNorm = 0.1018, lr_0 = 1.1052e-04
Loss = 1.3242e-03, PNorm = 185.2859, GNorm = 0.0862, lr_0 = 1.1044e-04
Loss = 4.9873e-03, PNorm = 185.2881, GNorm = 0.1758, lr_0 = 1.1036e-04
Loss = 3.2694e-03, PNorm = 185.2892, GNorm = 0.0978, lr_0 = 1.1029e-04
Loss = 1.0691e-03, PNorm = 185.2895, GNorm = 0.1490, lr_0 = 1.1021e-04
Loss = 3.9459e-03, PNorm = 185.2897, GNorm = 0.1140, lr_0 = 1.1014e-04
Loss = 5.4323e-03, PNorm = 185.2906, GNorm = 0.2933, lr_0 = 1.1006e-04
Loss = 4.4629e-03, PNorm = 185.2918, GNorm = 0.0779, lr_0 = 1.0999e-04
Loss = 1.5895e-03, PNorm = 185.2926, GNorm = 0.1785, lr_0 = 1.0991e-04
Loss = 3.7505e-03, PNorm = 185.2944, GNorm = 0.1300, lr_0 = 1.0984e-04
Loss = 2.2594e-03, PNorm = 185.2951, GNorm = 0.1138, lr_0 = 1.0976e-04
Loss = 1.5900e-03, PNorm = 185.2956, GNorm = 0.1141, lr_0 = 1.0969e-04
Loss = 1.9753e-03, PNorm = 185.2975, GNorm = 0.1100, lr_0 = 1.0961e-04
Loss = 5.0853e-03, PNorm = 185.2993, GNorm = 0.1033, lr_0 = 1.0954e-04
Loss = 5.7631e-03, PNorm = 185.3011, GNorm = 0.0894, lr_0 = 1.0946e-04
Loss = 3.5298e-03, PNorm = 185.3028, GNorm = 0.0979, lr_0 = 1.0939e-04
Loss = 3.5228e-03, PNorm = 185.3039, GNorm = 0.1014, lr_0 = 1.0931e-04
Loss = 1.0650e-03, PNorm = 185.3044, GNorm = 0.0673, lr_0 = 1.0924e-04
Loss = 2.2216e-03, PNorm = 185.3060, GNorm = 0.1522, lr_0 = 1.0916e-04
Loss = 1.9909e-03, PNorm = 185.3082, GNorm = 0.0983, lr_0 = 1.0909e-04
Loss = 3.1368e-03, PNorm = 185.3103, GNorm = 0.0578, lr_0 = 1.0901e-04
Loss = 3.0269e-03, PNorm = 185.3125, GNorm = 0.1127, lr_0 = 1.0894e-04
Loss = 1.5820e-03, PNorm = 185.3138, GNorm = 0.0596, lr_0 = 1.0886e-04
Loss = 4.4842e-03, PNorm = 185.3155, GNorm = 0.0831, lr_0 = 1.0879e-04
Loss = 6.6108e-03, PNorm = 185.3175, GNorm = 0.1175, lr_0 = 1.0871e-04
Loss = 1.7251e-03, PNorm = 185.3183, GNorm = 0.0748, lr_0 = 1.0864e-04
Loss = 3.3436e-03, PNorm = 185.3196, GNorm = 0.0811, lr_0 = 1.0856e-04
Validation mae = 0.121012
Epoch 29
Loss = 1.2512e-03, PNorm = 185.3207, GNorm = 0.0628, lr_0 = 1.0849e-04
Loss = 2.9833e-03, PNorm = 185.3216, GNorm = 0.0600, lr_0 = 1.0841e-04
Loss = 6.2630e-03, PNorm = 185.3230, GNorm = 0.1355, lr_0 = 1.0834e-04
Loss = 4.5108e-03, PNorm = 185.3250, GNorm = 0.0669, lr_0 = 1.0827e-04
Loss = 3.4903e-03, PNorm = 185.3262, GNorm = 0.0758, lr_0 = 1.0819e-04
Loss = 3.4245e-03, PNorm = 185.3287, GNorm = 0.0359, lr_0 = 1.0812e-04
Loss = 5.4770e-03, PNorm = 185.3310, GNorm = 0.1366, lr_0 = 1.0804e-04
Loss = 1.4657e-03, PNorm = 185.3325, GNorm = 0.0666, lr_0 = 1.0797e-04
Loss = 2.1621e-03, PNorm = 185.3342, GNorm = 0.0452, lr_0 = 1.0790e-04
Loss = 1.1957e-03, PNorm = 185.3364, GNorm = 0.0867, lr_0 = 1.0782e-04
Loss = 6.5685e-03, PNorm = 185.3378, GNorm = 0.1191, lr_0 = 1.0775e-04
Loss = 1.1678e-03, PNorm = 185.3386, GNorm = 0.0853, lr_0 = 1.0767e-04
Loss = 1.2260e-03, PNorm = 185.3394, GNorm = 0.0533, lr_0 = 1.0760e-04
Loss = 2.4479e-03, PNorm = 185.3397, GNorm = 0.0683, lr_0 = 1.0753e-04
Loss = 1.8304e-03, PNorm = 185.3407, GNorm = 0.0756, lr_0 = 1.0745e-04
Loss = 2.2635e-03, PNorm = 185.3423, GNorm = 0.0862, lr_0 = 1.0738e-04
Loss = 4.2818e-03, PNorm = 185.3436, GNorm = 0.3106, lr_0 = 1.0731e-04
Loss = 1.7090e-03, PNorm = 185.3457, GNorm = 0.1051, lr_0 = 1.0723e-04
Loss = 8.2687e-04, PNorm = 185.3468, GNorm = 0.0901, lr_0 = 1.0716e-04
Loss = 1.0866e-03, PNorm = 185.3485, GNorm = 0.0893, lr_0 = 1.0709e-04
Loss = 2.4917e-03, PNorm = 185.3509, GNorm = 0.1585, lr_0 = 1.0701e-04
Loss = 1.0634e-03, PNorm = 185.3525, GNorm = 0.0706, lr_0 = 1.0694e-04
Loss = 3.8065e-03, PNorm = 185.3532, GNorm = 0.1795, lr_0 = 1.0687e-04
Loss = 1.4905e-03, PNorm = 185.3545, GNorm = 0.2426, lr_0 = 1.0679e-04
Loss = 7.7630e-04, PNorm = 185.3559, GNorm = 0.0962, lr_0 = 1.0672e-04
Loss = 2.9083e-03, PNorm = 185.3579, GNorm = 0.1165, lr_0 = 1.0665e-04
Loss = 3.1093e-03, PNorm = 185.3595, GNorm = 0.0612, lr_0 = 1.0657e-04
Loss = 1.1449e-03, PNorm = 185.3617, GNorm = 0.0886, lr_0 = 1.0650e-04
Loss = 3.0789e-03, PNorm = 185.3620, GNorm = 0.0819, lr_0 = 1.0643e-04
Loss = 1.0273e-03, PNorm = 185.3635, GNorm = 0.0458, lr_0 = 1.0635e-04
Loss = 2.0009e-03, PNorm = 185.3645, GNorm = 0.1136, lr_0 = 1.0628e-04
Loss = 5.5629e-03, PNorm = 185.3654, GNorm = 0.1601, lr_0 = 1.0621e-04
Loss = 5.5143e-03, PNorm = 185.3662, GNorm = 0.0473, lr_0 = 1.0614e-04
Loss = 2.6012e-03, PNorm = 185.3674, GNorm = 0.0855, lr_0 = 1.0606e-04
Loss = 1.2827e-03, PNorm = 185.3683, GNorm = 0.0345, lr_0 = 1.0599e-04
Loss = 4.4861e-03, PNorm = 185.3699, GNorm = 0.0840, lr_0 = 1.0592e-04
Loss = 1.4019e-03, PNorm = 185.3718, GNorm = 0.1299, lr_0 = 1.0585e-04
Loss = 1.2263e-03, PNorm = 185.3727, GNorm = 0.1361, lr_0 = 1.0577e-04
Loss = 6.7822e-03, PNorm = 185.3736, GNorm = 0.0983, lr_0 = 1.0570e-04
Loss = 8.1357e-04, PNorm = 185.3738, GNorm = 0.0548, lr_0 = 1.0563e-04
Loss = 1.7898e-03, PNorm = 185.3756, GNorm = 0.0750, lr_0 = 1.0556e-04
Loss = 2.9197e-03, PNorm = 185.3768, GNorm = 0.0960, lr_0 = 1.0548e-04
Loss = 2.7344e-03, PNorm = 185.3790, GNorm = 0.0478, lr_0 = 1.0541e-04
Loss = 3.5010e-03, PNorm = 185.3792, GNorm = 0.0772, lr_0 = 1.0534e-04
Loss = 5.3047e-03, PNorm = 185.3792, GNorm = 0.0706, lr_0 = 1.0527e-04
Loss = 2.7181e-03, PNorm = 185.3792, GNorm = 0.1929, lr_0 = 1.0519e-04
Loss = 2.0329e-03, PNorm = 185.3805, GNorm = 0.0947, lr_0 = 1.0512e-04
Loss = 2.2595e-03, PNorm = 185.3821, GNorm = 0.0485, lr_0 = 1.0505e-04
Loss = 6.0062e-03, PNorm = 185.3822, GNorm = 0.0942, lr_0 = 1.0498e-04
Loss = 2.0889e-03, PNorm = 185.3842, GNorm = 0.0655, lr_0 = 1.0491e-04
Loss = 9.9854e-04, PNorm = 185.3865, GNorm = 0.0599, lr_0 = 1.0483e-04
Loss = 1.9051e-03, PNorm = 185.3878, GNorm = 0.2040, lr_0 = 1.0476e-04
Loss = 5.2889e-03, PNorm = 185.3894, GNorm = 0.0719, lr_0 = 1.0469e-04
Loss = 4.8213e-03, PNorm = 185.3910, GNorm = 0.0760, lr_0 = 1.0462e-04
Loss = 4.6109e-03, PNorm = 185.3918, GNorm = 0.0708, lr_0 = 1.0455e-04
Loss = 3.1176e-03, PNorm = 185.3925, GNorm = 0.6771, lr_0 = 1.0448e-04
Loss = 1.7427e-02, PNorm = 185.3939, GNorm = 0.2010, lr_0 = 1.0440e-04
Loss = 3.2499e-03, PNorm = 185.3934, GNorm = 0.1189, lr_0 = 1.0433e-04
Loss = 3.3806e-03, PNorm = 185.3930, GNorm = 0.4110, lr_0 = 1.0426e-04
Loss = 1.6244e-03, PNorm = 185.3948, GNorm = 0.0654, lr_0 = 1.0419e-04
Loss = 1.3550e-03, PNorm = 185.3956, GNorm = 0.0937, lr_0 = 1.0412e-04
Loss = 3.4355e-03, PNorm = 185.3952, GNorm = 0.1486, lr_0 = 1.0405e-04
Loss = 3.4672e-03, PNorm = 185.3965, GNorm = 0.0947, lr_0 = 1.0398e-04
Loss = 9.1589e-03, PNorm = 185.3977, GNorm = 0.0976, lr_0 = 1.0391e-04
Loss = 6.5581e-03, PNorm = 185.3996, GNorm = 0.0892, lr_0 = 1.0383e-04
Loss = 1.6386e-03, PNorm = 185.4016, GNorm = 0.0506, lr_0 = 1.0376e-04
Loss = 1.8110e-03, PNorm = 185.4020, GNorm = 0.0490, lr_0 = 1.0369e-04
Loss = 1.6016e-03, PNorm = 185.4024, GNorm = 0.0837, lr_0 = 1.0362e-04
Loss = 1.7866e-03, PNorm = 185.4035, GNorm = 0.0724, lr_0 = 1.0355e-04
Loss = 4.7321e-03, PNorm = 185.4039, GNorm = 0.1689, lr_0 = 1.0348e-04
Loss = 9.4726e-04, PNorm = 185.4056, GNorm = 0.0579, lr_0 = 1.0341e-04
Loss = 8.9947e-04, PNorm = 185.4070, GNorm = 0.0929, lr_0 = 1.0334e-04
Loss = 1.0215e-03, PNorm = 185.4078, GNorm = 0.0863, lr_0 = 1.0327e-04
Loss = 1.5843e-03, PNorm = 185.4092, GNorm = 0.0674, lr_0 = 1.0320e-04
Loss = 6.7522e-03, PNorm = 185.4106, GNorm = 0.1743, lr_0 = 1.0312e-04
Loss = 1.9577e-03, PNorm = 185.4112, GNorm = 0.1012, lr_0 = 1.0305e-04
Loss = 2.1625e-03, PNorm = 185.4118, GNorm = 0.0908, lr_0 = 1.0298e-04
Loss = 2.2866e-03, PNorm = 185.4130, GNorm = 0.0854, lr_0 = 1.0291e-04
Loss = 5.2629e-03, PNorm = 185.4150, GNorm = 0.0841, lr_0 = 1.0284e-04
Loss = 6.1541e-03, PNorm = 185.4147, GNorm = 0.1223, lr_0 = 1.0277e-04
Loss = 1.0070e-03, PNorm = 185.4162, GNorm = 0.1261, lr_0 = 1.0270e-04
Loss = 2.1854e-03, PNorm = 185.4184, GNorm = 0.1422, lr_0 = 1.0263e-04
Loss = 2.0450e-03, PNorm = 185.4197, GNorm = 0.0700, lr_0 = 1.0256e-04
Loss = 4.0046e-03, PNorm = 185.4206, GNorm = 0.1017, lr_0 = 1.0249e-04
Loss = 9.3807e-04, PNorm = 185.4221, GNorm = 0.0546, lr_0 = 1.0242e-04
Loss = 2.5552e-03, PNorm = 185.4225, GNorm = 0.1116, lr_0 = 1.0235e-04
Loss = 3.6305e-03, PNorm = 185.4228, GNorm = 0.0413, lr_0 = 1.0228e-04
Loss = 9.8807e-04, PNorm = 185.4233, GNorm = 0.0492, lr_0 = 1.0221e-04
Loss = 1.1144e-03, PNorm = 185.4254, GNorm = 0.0763, lr_0 = 1.0214e-04
Loss = 4.5708e-03, PNorm = 185.4266, GNorm = 0.1325, lr_0 = 1.0207e-04
Loss = 3.2282e-03, PNorm = 185.4284, GNorm = 0.1037, lr_0 = 1.0200e-04
Loss = 2.8842e-03, PNorm = 185.4298, GNorm = 0.0895, lr_0 = 1.0193e-04
Loss = 2.0319e-03, PNorm = 185.4306, GNorm = 0.0555, lr_0 = 1.0186e-04
Loss = 1.6304e-03, PNorm = 185.4317, GNorm = 0.4438, lr_0 = 1.0179e-04
Loss = 2.4131e-03, PNorm = 185.4330, GNorm = 0.0937, lr_0 = 1.0172e-04
Loss = 1.4314e-03, PNorm = 185.4343, GNorm = 0.0822, lr_0 = 1.0165e-04
Loss = 9.7291e-04, PNorm = 185.4352, GNorm = 0.0971, lr_0 = 1.0158e-04
Loss = 1.4465e-03, PNorm = 185.4364, GNorm = 0.1589, lr_0 = 1.0151e-04
Loss = 3.4516e-03, PNorm = 185.4375, GNorm = 0.0747, lr_0 = 1.0144e-04
Loss = 6.5686e-03, PNorm = 185.4376, GNorm = 0.0650, lr_0 = 1.0137e-04
Loss = 3.2655e-03, PNorm = 185.4397, GNorm = 0.0449, lr_0 = 1.0130e-04
Loss = 1.4133e-03, PNorm = 185.4412, GNorm = 0.0482, lr_0 = 1.0123e-04
Loss = 2.1776e-03, PNorm = 185.4413, GNorm = 0.0418, lr_0 = 1.0116e-04
Loss = 1.2436e-03, PNorm = 185.4416, GNorm = 0.1019, lr_0 = 1.0110e-04
Loss = 1.4562e-03, PNorm = 185.4422, GNorm = 0.0461, lr_0 = 1.0103e-04
Loss = 7.2309e-04, PNorm = 185.4435, GNorm = 0.0528, lr_0 = 1.0096e-04
Loss = 2.0983e-03, PNorm = 185.4452, GNorm = 0.1098, lr_0 = 1.0089e-04
Loss = 2.0242e-03, PNorm = 185.4471, GNorm = 0.2266, lr_0 = 1.0082e-04
Loss = 1.2131e-03, PNorm = 185.4482, GNorm = 0.1246, lr_0 = 1.0075e-04
Loss = 4.0708e-03, PNorm = 185.4497, GNorm = 0.0503, lr_0 = 1.0068e-04
Loss = 9.8788e-04, PNorm = 185.4515, GNorm = 0.0411, lr_0 = 1.0061e-04
Loss = 2.0440e-03, PNorm = 185.4524, GNorm = 0.1058, lr_0 = 1.0054e-04
Loss = 1.6433e-03, PNorm = 185.4529, GNorm = 0.1985, lr_0 = 1.0047e-04
Loss = 2.0142e-03, PNorm = 185.4534, GNorm = 0.0490, lr_0 = 1.0041e-04
Loss = 2.3881e-03, PNorm = 185.4546, GNorm = 0.1053, lr_0 = 1.0034e-04
Loss = 4.0885e-03, PNorm = 185.4557, GNorm = 0.0843, lr_0 = 1.0027e-04
Loss = 1.0694e-03, PNorm = 185.4567, GNorm = 0.0508, lr_0 = 1.0020e-04
Loss = 1.4321e-03, PNorm = 185.4575, GNorm = 0.1254, lr_0 = 1.0013e-04
Loss = 5.1702e-03, PNorm = 185.4590, GNorm = 0.9051, lr_0 = 1.0006e-04
Loss = 4.6656e-03, PNorm = 185.4590, GNorm = 0.0549, lr_0 = 1.0000e-04
Validation mae = 0.121059
Model 0 best validation mae = 0.120950 on epoch 27
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119205
Ensemble test mae = 0.119205
Fold 4
Splitting data with seed 4
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.2425e-01, PNorm = 64.6286, GNorm = 1.7893, lr_0 = 1.0413e-04
Loss = 7.8276e-01, PNorm = 64.6396, GNorm = 2.0474, lr_0 = 1.0788e-04
Loss = 8.0519e-01, PNorm = 64.6499, GNorm = 2.0196, lr_0 = 1.1163e-04
Loss = 7.6696e-01, PNorm = 64.6593, GNorm = 2.4868, lr_0 = 1.1537e-04
Loss = 7.7633e-01, PNorm = 64.6695, GNorm = 2.6685, lr_0 = 1.1913e-04
Loss = 6.3360e-01, PNorm = 64.6795, GNorm = 1.6383, lr_0 = 1.2287e-04
Loss = 7.3110e-01, PNorm = 64.6890, GNorm = 2.4972, lr_0 = 1.2663e-04
Loss = 6.5145e-01, PNorm = 64.6988, GNorm = 2.7605, lr_0 = 1.3038e-04
Loss = 7.1921e-01, PNorm = 64.7098, GNorm = 3.2510, lr_0 = 1.3413e-04
Loss = 6.2905e-01, PNorm = 64.7199, GNorm = 3.1624, lr_0 = 1.3788e-04
Loss = 6.4601e-01, PNorm = 64.7303, GNorm = 2.0426, lr_0 = 1.4163e-04
Loss = 6.0774e-01, PNorm = 64.7406, GNorm = 2.2507, lr_0 = 1.4537e-04
Loss = 6.3913e-01, PNorm = 64.7506, GNorm = 2.0160, lr_0 = 1.4913e-04
Loss = 5.9089e-01, PNorm = 64.7622, GNorm = 2.4678, lr_0 = 1.5288e-04
Loss = 6.2581e-01, PNorm = 64.7743, GNorm = 1.9579, lr_0 = 1.5662e-04
Loss = 5.9927e-01, PNorm = 64.7857, GNorm = 2.4405, lr_0 = 1.6038e-04
Loss = 6.8215e-01, PNorm = 64.7982, GNorm = 1.9716, lr_0 = 1.6412e-04
Loss = 5.1175e-01, PNorm = 64.8110, GNorm = 2.5149, lr_0 = 1.6788e-04
Loss = 5.6668e-01, PNorm = 64.8234, GNorm = 2.9951, lr_0 = 1.7163e-04
Loss = 6.3164e-01, PNorm = 64.8371, GNorm = 1.9740, lr_0 = 1.7538e-04
Loss = 5.9399e-01, PNorm = 64.8488, GNorm = 2.2899, lr_0 = 1.7913e-04
Loss = 6.2591e-01, PNorm = 64.8630, GNorm = 2.6610, lr_0 = 1.8288e-04
Loss = 6.3521e-01, PNorm = 64.8797, GNorm = 5.2677, lr_0 = 1.8662e-04
Loss = 6.3423e-01, PNorm = 64.8948, GNorm = 2.2169, lr_0 = 1.9038e-04
Loss = 5.6218e-01, PNorm = 64.9124, GNorm = 1.6573, lr_0 = 1.9413e-04
Loss = 6.4335e-01, PNorm = 64.9283, GNorm = 2.1827, lr_0 = 1.9788e-04
Loss = 5.8085e-01, PNorm = 64.9435, GNorm = 3.6747, lr_0 = 2.0163e-04
Loss = 6.0001e-01, PNorm = 64.9577, GNorm = 1.9970, lr_0 = 2.0537e-04
Loss = 5.4426e-01, PNorm = 64.9748, GNorm = 1.6917, lr_0 = 2.0913e-04
Loss = 5.8782e-01, PNorm = 64.9911, GNorm = 2.5219, lr_0 = 2.1288e-04
Loss = 5.9803e-01, PNorm = 65.0086, GNorm = 1.8781, lr_0 = 2.1663e-04
Loss = 5.5907e-01, PNorm = 65.0242, GNorm = 1.7773, lr_0 = 2.2038e-04
Loss = 5.5347e-01, PNorm = 65.0412, GNorm = 1.7957, lr_0 = 2.2412e-04
Loss = 5.5959e-01, PNorm = 65.0592, GNorm = 1.9487, lr_0 = 2.2787e-04
Loss = 5.5660e-01, PNorm = 65.0783, GNorm = 2.1595, lr_0 = 2.3163e-04
Loss = 5.5844e-01, PNorm = 65.0930, GNorm = 2.2183, lr_0 = 2.3538e-04
Loss = 6.3607e-01, PNorm = 65.1119, GNorm = 2.6770, lr_0 = 2.3913e-04
Loss = 5.7369e-01, PNorm = 65.1353, GNorm = 1.9943, lr_0 = 2.4288e-04
Loss = 5.3292e-01, PNorm = 65.1549, GNorm = 1.3991, lr_0 = 2.4662e-04
Loss = 5.4372e-01, PNorm = 65.1762, GNorm = 1.7705, lr_0 = 2.5038e-04
Loss = 5.9494e-01, PNorm = 65.1979, GNorm = 1.6995, lr_0 = 2.5413e-04
Loss = 5.4688e-01, PNorm = 65.2193, GNorm = 1.5305, lr_0 = 2.5788e-04
Loss = 4.5658e-01, PNorm = 65.2390, GNorm = 2.6712, lr_0 = 2.6163e-04
Loss = 5.6500e-01, PNorm = 65.2585, GNorm = 1.4748, lr_0 = 2.6537e-04
Loss = 5.3279e-01, PNorm = 65.2806, GNorm = 2.2577, lr_0 = 2.6912e-04
Loss = 4.9511e-01, PNorm = 65.3059, GNorm = 1.6546, lr_0 = 2.7288e-04
Loss = 5.2111e-01, PNorm = 65.3298, GNorm = 1.7064, lr_0 = 2.7663e-04
Loss = 5.2601e-01, PNorm = 65.3513, GNorm = 1.6647, lr_0 = 2.8038e-04
Loss = 5.5097e-01, PNorm = 65.3788, GNorm = 1.8422, lr_0 = 2.8413e-04
Loss = 5.7432e-01, PNorm = 65.4073, GNorm = 2.4134, lr_0 = 2.8787e-04
Loss = 6.0520e-01, PNorm = 65.4396, GNorm = 1.9509, lr_0 = 2.9163e-04
Loss = 5.1304e-01, PNorm = 65.4698, GNorm = 1.6779, lr_0 = 2.9538e-04
Loss = 5.2637e-01, PNorm = 65.5002, GNorm = 1.9593, lr_0 = 2.9913e-04
Loss = 5.1721e-01, PNorm = 65.5307, GNorm = 1.7279, lr_0 = 3.0288e-04
Loss = 5.0200e-01, PNorm = 65.5598, GNorm = 1.4499, lr_0 = 3.0662e-04
Loss = 4.9245e-01, PNorm = 65.5894, GNorm = 1.4206, lr_0 = 3.1037e-04
Loss = 5.6211e-01, PNorm = 65.6216, GNorm = 1.4773, lr_0 = 3.1413e-04
Loss = 5.2226e-01, PNorm = 65.6541, GNorm = 1.6531, lr_0 = 3.1788e-04
Loss = 5.1498e-01, PNorm = 65.6839, GNorm = 1.4143, lr_0 = 3.2163e-04
Loss = 5.8588e-01, PNorm = 65.7128, GNorm = 1.9861, lr_0 = 3.2538e-04
Loss = 4.8764e-01, PNorm = 65.7457, GNorm = 1.4949, lr_0 = 3.2912e-04
Loss = 5.5436e-01, PNorm = 65.7782, GNorm = 1.5216, lr_0 = 3.3288e-04
Loss = 5.5133e-01, PNorm = 65.8131, GNorm = 1.4834, lr_0 = 3.3663e-04
Loss = 5.2647e-01, PNorm = 65.8396, GNorm = 1.1713, lr_0 = 3.4038e-04
Loss = 5.0135e-01, PNorm = 65.8764, GNorm = 1.8969, lr_0 = 3.4413e-04
Loss = 4.9542e-01, PNorm = 65.9102, GNorm = 1.6935, lr_0 = 3.4787e-04
Loss = 5.3170e-01, PNorm = 65.9368, GNorm = 1.3089, lr_0 = 3.5162e-04
Loss = 5.7976e-01, PNorm = 65.9728, GNorm = 1.5369, lr_0 = 3.5538e-04
Loss = 5.3783e-01, PNorm = 66.0099, GNorm = 1.1901, lr_0 = 3.5913e-04
Loss = 4.6809e-01, PNorm = 66.0421, GNorm = 1.0444, lr_0 = 3.6288e-04
Loss = 5.3382e-01, PNorm = 66.0744, GNorm = 1.5697, lr_0 = 3.6662e-04
Loss = 4.8309e-01, PNorm = 66.1074, GNorm = 1.5057, lr_0 = 3.7037e-04
Loss = 5.1920e-01, PNorm = 66.1433, GNorm = 1.5454, lr_0 = 3.7413e-04
Loss = 6.1466e-01, PNorm = 66.1772, GNorm = 1.4416, lr_0 = 3.7788e-04
Loss = 5.4267e-01, PNorm = 66.2140, GNorm = 1.5964, lr_0 = 3.8163e-04
Loss = 5.3251e-01, PNorm = 66.2549, GNorm = 1.7408, lr_0 = 3.8537e-04
Loss = 6.0247e-01, PNorm = 66.2965, GNorm = 1.4691, lr_0 = 3.8912e-04
Loss = 4.5548e-01, PNorm = 66.3400, GNorm = 1.1406, lr_0 = 3.9287e-04
Loss = 5.4148e-01, PNorm = 66.3745, GNorm = 1.5025, lr_0 = 3.9663e-04
Loss = 5.1642e-01, PNorm = 66.4188, GNorm = 1.3641, lr_0 = 4.0038e-04
Loss = 5.1487e-01, PNorm = 66.4623, GNorm = 1.1165, lr_0 = 4.0413e-04
Loss = 6.1404e-01, PNorm = 66.5080, GNorm = 1.5967, lr_0 = 4.0787e-04
Loss = 6.0804e-01, PNorm = 66.5566, GNorm = 1.4433, lr_0 = 4.1162e-04
Loss = 5.4508e-01, PNorm = 66.6073, GNorm = 1.9096, lr_0 = 4.1537e-04
Loss = 5.9249e-01, PNorm = 66.6564, GNorm = 1.3064, lr_0 = 4.1913e-04
Loss = 4.5028e-01, PNorm = 66.7065, GNorm = 1.3789, lr_0 = 4.2288e-04
Loss = 4.3581e-01, PNorm = 66.7509, GNorm = 1.3291, lr_0 = 4.2662e-04
Loss = 4.3382e-01, PNorm = 66.7965, GNorm = 1.2077, lr_0 = 4.3037e-04
Loss = 5.0161e-01, PNorm = 66.8405, GNorm = 1.4229, lr_0 = 4.3412e-04
Loss = 4.9835e-01, PNorm = 66.8877, GNorm = 1.7818, lr_0 = 4.3788e-04
Loss = 5.2107e-01, PNorm = 66.9359, GNorm = 1.2897, lr_0 = 4.4163e-04
Loss = 5.1047e-01, PNorm = 66.9961, GNorm = 2.2284, lr_0 = 4.4538e-04
Loss = 5.0242e-01, PNorm = 67.0416, GNorm = 1.3567, lr_0 = 4.4912e-04
Loss = 4.8665e-01, PNorm = 67.0906, GNorm = 2.0166, lr_0 = 4.5287e-04
Loss = 5.1138e-01, PNorm = 67.1421, GNorm = 1.4787, lr_0 = 4.5662e-04
Loss = 5.0160e-01, PNorm = 67.1918, GNorm = 1.2692, lr_0 = 4.6038e-04
Loss = 5.1682e-01, PNorm = 67.2422, GNorm = 1.3529, lr_0 = 4.6413e-04
Loss = 5.3324e-01, PNorm = 67.2939, GNorm = 0.8422, lr_0 = 4.6787e-04
Loss = 4.6099e-01, PNorm = 67.3440, GNorm = 1.0858, lr_0 = 4.7162e-04
Loss = 4.6931e-01, PNorm = 67.3945, GNorm = 1.4436, lr_0 = 4.7537e-04
Loss = 5.1185e-01, PNorm = 67.4476, GNorm = 1.3923, lr_0 = 4.7913e-04
Loss = 5.3964e-01, PNorm = 67.5037, GNorm = 1.2023, lr_0 = 4.8288e-04
Loss = 4.8004e-01, PNorm = 67.5608, GNorm = 1.3892, lr_0 = 4.8663e-04
Loss = 4.8584e-01, PNorm = 67.6204, GNorm = 1.4391, lr_0 = 4.9038e-04
Loss = 4.9569e-01, PNorm = 67.6802, GNorm = 1.5966, lr_0 = 4.9412e-04
Loss = 4.7431e-01, PNorm = 67.7427, GNorm = 2.0004, lr_0 = 4.9788e-04
Loss = 4.9071e-01, PNorm = 67.8074, GNorm = 1.2880, lr_0 = 5.0163e-04
Loss = 4.6151e-01, PNorm = 67.8762, GNorm = 1.8651, lr_0 = 5.0538e-04
Loss = 5.0349e-01, PNorm = 67.9361, GNorm = 1.3116, lr_0 = 5.0913e-04
Loss = 5.5539e-01, PNorm = 68.0032, GNorm = 1.0892, lr_0 = 5.1287e-04
Loss = 5.3906e-01, PNorm = 68.0660, GNorm = 1.1375, lr_0 = 5.1663e-04
Loss = 4.7799e-01, PNorm = 68.1274, GNorm = 1.7446, lr_0 = 5.2038e-04
Loss = 4.7864e-01, PNorm = 68.1860, GNorm = 1.1306, lr_0 = 5.2413e-04
Loss = 5.4068e-01, PNorm = 68.2520, GNorm = 1.5984, lr_0 = 5.2788e-04
Loss = 6.0977e-01, PNorm = 68.3185, GNorm = 1.7581, lr_0 = 5.3162e-04
Loss = 5.4041e-01, PNorm = 68.3908, GNorm = 1.4853, lr_0 = 5.3538e-04
Loss = 4.8737e-01, PNorm = 68.4675, GNorm = 1.4053, lr_0 = 5.3912e-04
Loss = 5.0752e-01, PNorm = 68.5339, GNorm = 0.9792, lr_0 = 5.4288e-04
Loss = 4.9935e-01, PNorm = 68.5953, GNorm = 0.8922, lr_0 = 5.4663e-04
Loss = 5.0103e-01, PNorm = 68.6543, GNorm = 0.9605, lr_0 = 5.5038e-04
Validation mae = 0.129463
Epoch 1
Loss = 3.9380e-01, PNorm = 68.7246, GNorm = 0.9859, lr_0 = 5.5413e-04
Loss = 3.6719e-01, PNorm = 68.7896, GNorm = 0.9347, lr_0 = 5.5787e-04
Loss = 3.8601e-01, PNorm = 68.8602, GNorm = 0.9691, lr_0 = 5.6163e-04
Loss = 3.8830e-01, PNorm = 68.9366, GNorm = 1.1263, lr_0 = 5.6538e-04
Loss = 3.9525e-01, PNorm = 69.0098, GNorm = 1.7320, lr_0 = 5.6913e-04
Loss = 4.4192e-01, PNorm = 69.0978, GNorm = 1.4164, lr_0 = 5.7288e-04
Loss = 4.0646e-01, PNorm = 69.1848, GNorm = 1.9175, lr_0 = 5.7662e-04
Loss = 3.5622e-01, PNorm = 69.2843, GNorm = 1.1025, lr_0 = 5.8038e-04
Loss = 3.5269e-01, PNorm = 69.3708, GNorm = 1.3433, lr_0 = 5.8413e-04
Loss = 3.4719e-01, PNorm = 69.4578, GNorm = 1.2251, lr_0 = 5.8788e-04
Loss = 3.6137e-01, PNorm = 69.5411, GNorm = 1.1062, lr_0 = 5.9163e-04
Loss = 3.5669e-01, PNorm = 69.6246, GNorm = 1.1011, lr_0 = 5.9538e-04
Loss = 3.8730e-01, PNorm = 69.7109, GNorm = 1.0619, lr_0 = 5.9913e-04
Loss = 3.4516e-01, PNorm = 69.8045, GNorm = 1.1146, lr_0 = 6.0288e-04
Loss = 2.9267e-01, PNorm = 69.8976, GNorm = 1.4222, lr_0 = 6.0663e-04
Loss = 3.8362e-01, PNorm = 69.9916, GNorm = 1.4829, lr_0 = 6.1038e-04
Loss = 3.3915e-01, PNorm = 70.0930, GNorm = 1.0850, lr_0 = 6.1413e-04
Loss = 4.0881e-01, PNorm = 70.1940, GNorm = 1.5562, lr_0 = 6.1788e-04
Loss = 3.9160e-01, PNorm = 70.3097, GNorm = 1.4073, lr_0 = 6.2163e-04
Loss = 4.9195e-01, PNorm = 70.4229, GNorm = 1.2801, lr_0 = 6.2538e-04
Loss = 3.3915e-01, PNorm = 70.5511, GNorm = 0.9631, lr_0 = 6.2913e-04
Loss = 4.2388e-01, PNorm = 70.6662, GNorm = 1.5651, lr_0 = 6.3288e-04
Loss = 4.0065e-01, PNorm = 70.7860, GNorm = 1.1275, lr_0 = 6.3663e-04
Loss = 4.2194e-01, PNorm = 70.9054, GNorm = 1.5032, lr_0 = 6.4038e-04
Loss = 4.6945e-01, PNorm = 71.0374, GNorm = 1.3910, lr_0 = 6.4413e-04
Loss = 4.5666e-01, PNorm = 71.1587, GNorm = 1.2179, lr_0 = 6.4788e-04
Loss = 3.7537e-01, PNorm = 71.2873, GNorm = 1.2402, lr_0 = 6.5163e-04
Loss = 4.6593e-01, PNorm = 71.4048, GNorm = 1.2427, lr_0 = 6.5538e-04
Loss = 4.1205e-01, PNorm = 71.5336, GNorm = 0.9004, lr_0 = 6.5913e-04
Loss = 3.6656e-01, PNorm = 71.6476, GNorm = 0.9230, lr_0 = 6.6288e-04
Loss = 3.9827e-01, PNorm = 71.7504, GNorm = 1.1335, lr_0 = 6.6663e-04
Loss = 3.9092e-01, PNorm = 71.8600, GNorm = 1.2216, lr_0 = 6.7038e-04
Loss = 3.3686e-01, PNorm = 71.9701, GNorm = 0.9487, lr_0 = 6.7413e-04
Loss = 4.3363e-01, PNorm = 72.0879, GNorm = 1.4398, lr_0 = 6.7788e-04
Loss = 3.8345e-01, PNorm = 72.2079, GNorm = 1.1242, lr_0 = 6.8163e-04
Loss = 4.3409e-01, PNorm = 72.3355, GNorm = 1.3196, lr_0 = 6.8538e-04
Loss = 3.6389e-01, PNorm = 72.4593, GNorm = 1.2559, lr_0 = 6.8913e-04
Loss = 4.2305e-01, PNorm = 72.5776, GNorm = 1.4777, lr_0 = 6.9288e-04
Loss = 4.1205e-01, PNorm = 72.7040, GNorm = 1.2727, lr_0 = 6.9663e-04
Loss = 3.8993e-01, PNorm = 72.8158, GNorm = 1.1985, lr_0 = 7.0038e-04
Loss = 3.8864e-01, PNorm = 72.9335, GNorm = 1.2866, lr_0 = 7.0413e-04
Loss = 4.2587e-01, PNorm = 73.0462, GNorm = 1.3892, lr_0 = 7.0788e-04
Loss = 3.5237e-01, PNorm = 73.1595, GNorm = 1.0994, lr_0 = 7.1163e-04
Loss = 4.2125e-01, PNorm = 73.2709, GNorm = 1.2234, lr_0 = 7.1538e-04
Loss = 3.3827e-01, PNorm = 73.3838, GNorm = 1.1652, lr_0 = 7.1913e-04
Loss = 4.2751e-01, PNorm = 73.5071, GNorm = 1.1509, lr_0 = 7.2288e-04
Loss = 4.3510e-01, PNorm = 73.6209, GNorm = 1.2693, lr_0 = 7.2663e-04
Loss = 4.2245e-01, PNorm = 73.7484, GNorm = 1.0365, lr_0 = 7.3038e-04
Loss = 3.9446e-01, PNorm = 73.8763, GNorm = 1.1880, lr_0 = 7.3413e-04
Loss = 3.9903e-01, PNorm = 74.0062, GNorm = 0.8405, lr_0 = 7.3788e-04
Loss = 4.4781e-01, PNorm = 74.1420, GNorm = 1.1613, lr_0 = 7.4163e-04
Loss = 4.6945e-01, PNorm = 74.2787, GNorm = 2.2403, lr_0 = 7.4538e-04
Loss = 4.3112e-01, PNorm = 74.4179, GNorm = 1.3686, lr_0 = 7.4913e-04
Loss = 4.1805e-01, PNorm = 74.5646, GNorm = 1.1433, lr_0 = 7.5288e-04
Loss = 4.6628e-01, PNorm = 74.7122, GNorm = 1.8037, lr_0 = 7.5663e-04
Loss = 3.8480e-01, PNorm = 74.8683, GNorm = 1.2603, lr_0 = 7.6038e-04
Loss = 4.4559e-01, PNorm = 74.9802, GNorm = 0.9153, lr_0 = 7.6413e-04
Loss = 4.0535e-01, PNorm = 75.1185, GNorm = 1.0717, lr_0 = 7.6788e-04
Loss = 4.0644e-01, PNorm = 75.2383, GNorm = 1.3510, lr_0 = 7.7163e-04
Loss = 4.8178e-01, PNorm = 75.3600, GNorm = 1.1713, lr_0 = 7.7538e-04
Loss = 4.4751e-01, PNorm = 75.4977, GNorm = 1.3656, lr_0 = 7.7913e-04
Loss = 4.1962e-01, PNorm = 75.6333, GNorm = 1.6951, lr_0 = 7.8288e-04
Loss = 4.6217e-01, PNorm = 75.7778, GNorm = 1.0312, lr_0 = 7.8663e-04
Loss = 4.4318e-01, PNorm = 75.9247, GNorm = 1.5944, lr_0 = 7.9038e-04
Loss = 3.4490e-01, PNorm = 76.0634, GNorm = 0.9940, lr_0 = 7.9413e-04
Loss = 4.6419e-01, PNorm = 76.1965, GNorm = 0.8561, lr_0 = 7.9788e-04
Loss = 4.7085e-01, PNorm = 76.3369, GNorm = 1.0644, lr_0 = 8.0163e-04
Loss = 3.8820e-01, PNorm = 76.4741, GNorm = 1.2904, lr_0 = 8.0538e-04
Loss = 4.4494e-01, PNorm = 76.6170, GNorm = 1.5235, lr_0 = 8.0913e-04
Loss = 3.8109e-01, PNorm = 76.7580, GNorm = 0.9243, lr_0 = 8.1288e-04
Loss = 4.3028e-01, PNorm = 76.9066, GNorm = 1.2607, lr_0 = 8.1663e-04
Loss = 4.6813e-01, PNorm = 77.0479, GNorm = 1.1155, lr_0 = 8.2038e-04
Loss = 4.6070e-01, PNorm = 77.2079, GNorm = 1.2460, lr_0 = 8.2413e-04
Loss = 3.7977e-01, PNorm = 77.3567, GNorm = 1.0655, lr_0 = 8.2788e-04
Loss = 4.5508e-01, PNorm = 77.4999, GNorm = 1.2651, lr_0 = 8.3163e-04
Loss = 3.9256e-01, PNorm = 77.6437, GNorm = 0.8310, lr_0 = 8.3538e-04
Loss = 4.5271e-01, PNorm = 77.7923, GNorm = 1.5467, lr_0 = 8.3913e-04
Loss = 4.6353e-01, PNorm = 77.9457, GNorm = 1.4124, lr_0 = 8.4288e-04
Loss = 4.5577e-01, PNorm = 78.1100, GNorm = 1.3482, lr_0 = 8.4663e-04
Loss = 5.1177e-01, PNorm = 78.2653, GNorm = 1.2807, lr_0 = 8.5038e-04
Loss = 4.2732e-01, PNorm = 78.4236, GNorm = 1.6560, lr_0 = 8.5413e-04
Loss = 4.2348e-01, PNorm = 78.5699, GNorm = 0.9588, lr_0 = 8.5788e-04
Loss = 4.5647e-01, PNorm = 78.7127, GNorm = 1.1479, lr_0 = 8.6163e-04
Loss = 4.4737e-01, PNorm = 78.8599, GNorm = 1.7683, lr_0 = 8.6538e-04
Loss = 4.4542e-01, PNorm = 79.0037, GNorm = 0.8430, lr_0 = 8.6913e-04
Loss = 5.2419e-01, PNorm = 79.1525, GNorm = 1.0954, lr_0 = 8.7288e-04
Loss = 4.0768e-01, PNorm = 79.3168, GNorm = 1.0003, lr_0 = 8.7663e-04
Loss = 4.0812e-01, PNorm = 79.4734, GNorm = 1.4906, lr_0 = 8.8038e-04
Loss = 4.4238e-01, PNorm = 79.6382, GNorm = 1.0736, lr_0 = 8.8413e-04
Loss = 4.6176e-01, PNorm = 79.8037, GNorm = 1.1167, lr_0 = 8.8788e-04
Loss = 3.8880e-01, PNorm = 79.9631, GNorm = 1.1086, lr_0 = 8.9163e-04
Loss = 4.3881e-01, PNorm = 80.1007, GNorm = 1.0766, lr_0 = 8.9538e-04
Loss = 4.1424e-01, PNorm = 80.2410, GNorm = 0.9778, lr_0 = 8.9913e-04
Loss = 4.5321e-01, PNorm = 80.3856, GNorm = 0.8884, lr_0 = 9.0288e-04
Loss = 4.1040e-01, PNorm = 80.5437, GNorm = 1.1253, lr_0 = 9.0663e-04
Loss = 4.3686e-01, PNorm = 80.6966, GNorm = 1.5108, lr_0 = 9.1038e-04
Loss = 4.6340e-01, PNorm = 80.8619, GNorm = 0.8290, lr_0 = 9.1413e-04
Loss = 4.4528e-01, PNorm = 81.0298, GNorm = 1.2406, lr_0 = 9.1788e-04
Loss = 4.6872e-01, PNorm = 81.1985, GNorm = 1.0016, lr_0 = 9.2163e-04
Loss = 4.3233e-01, PNorm = 81.3689, GNorm = 1.6644, lr_0 = 9.2538e-04
Loss = 4.2749e-01, PNorm = 81.5258, GNorm = 1.1517, lr_0 = 9.2913e-04
Loss = 4.5461e-01, PNorm = 81.6768, GNorm = 1.0503, lr_0 = 9.3288e-04
Loss = 4.3738e-01, PNorm = 81.8384, GNorm = 1.2446, lr_0 = 9.3663e-04
Loss = 3.6606e-01, PNorm = 81.9921, GNorm = 1.0211, lr_0 = 9.4038e-04
Loss = 4.1689e-01, PNorm = 82.1358, GNorm = 1.5516, lr_0 = 9.4413e-04
Loss = 4.0806e-01, PNorm = 82.2901, GNorm = 0.7680, lr_0 = 9.4788e-04
Loss = 4.3522e-01, PNorm = 82.4600, GNorm = 1.4346, lr_0 = 9.5163e-04
Loss = 4.3822e-01, PNorm = 82.6225, GNorm = 1.0867, lr_0 = 9.5538e-04
Loss = 4.5383e-01, PNorm = 82.8129, GNorm = 1.3152, lr_0 = 9.5913e-04
Loss = 4.2697e-01, PNorm = 82.9675, GNorm = 1.1999, lr_0 = 9.6288e-04
Loss = 3.9644e-01, PNorm = 83.1420, GNorm = 0.7556, lr_0 = 9.6663e-04
Loss = 3.5959e-01, PNorm = 83.2983, GNorm = 1.1933, lr_0 = 9.7038e-04
Loss = 4.6548e-01, PNorm = 83.4514, GNorm = 0.9375, lr_0 = 9.7413e-04
Loss = 4.1874e-01, PNorm = 83.6057, GNorm = 1.3008, lr_0 = 9.7788e-04
Loss = 4.1045e-01, PNorm = 83.7641, GNorm = 1.6033, lr_0 = 9.8163e-04
Loss = 4.8739e-01, PNorm = 83.9214, GNorm = 1.5982, lr_0 = 9.8537e-04
Loss = 5.3912e-01, PNorm = 84.0995, GNorm = 1.3545, lr_0 = 9.8912e-04
Loss = 3.6989e-01, PNorm = 84.2831, GNorm = 1.2136, lr_0 = 9.9288e-04
Loss = 4.3824e-01, PNorm = 84.4652, GNorm = 1.3258, lr_0 = 9.9663e-04
Loss = 4.2879e-01, PNorm = 84.6490, GNorm = 1.0972, lr_0 = 9.9993e-04
Validation mae = 0.127062
Epoch 2
Loss = 2.7555e-01, PNorm = 84.8202, GNorm = 0.7991, lr_0 = 9.9925e-04
Loss = 2.6797e-01, PNorm = 84.9686, GNorm = 0.9491, lr_0 = 9.9856e-04
Loss = 3.0240e-01, PNorm = 85.1051, GNorm = 0.8511, lr_0 = 9.9788e-04
Loss = 3.0420e-01, PNorm = 85.2565, GNorm = 0.8113, lr_0 = 9.9719e-04
Loss = 2.8349e-01, PNorm = 85.4013, GNorm = 1.2446, lr_0 = 9.9651e-04
Loss = 3.1196e-01, PNorm = 85.5565, GNorm = 1.1016, lr_0 = 9.9583e-04
Loss = 2.7391e-01, PNorm = 85.7014, GNorm = 1.0154, lr_0 = 9.9515e-04
Loss = 2.5700e-01, PNorm = 85.8519, GNorm = 2.1044, lr_0 = 9.9446e-04
Loss = 2.7031e-01, PNorm = 86.0037, GNorm = 1.0771, lr_0 = 9.9378e-04
Loss = 3.2739e-01, PNorm = 86.1741, GNorm = 1.2241, lr_0 = 9.9310e-04
Loss = 3.1421e-01, PNorm = 86.3462, GNorm = 1.1267, lr_0 = 9.9242e-04
Loss = 3.4785e-01, PNorm = 86.5140, GNorm = 1.3064, lr_0 = 9.9174e-04
Loss = 3.3811e-01, PNorm = 86.7131, GNorm = 0.8052, lr_0 = 9.9106e-04
Loss = 2.4680e-01, PNorm = 86.8961, GNorm = 1.1297, lr_0 = 9.9038e-04
Loss = 2.9566e-01, PNorm = 87.0658, GNorm = 1.1587, lr_0 = 9.8971e-04
Loss = 2.4897e-01, PNorm = 87.2237, GNorm = 1.1395, lr_0 = 9.8903e-04
Loss = 2.8298e-01, PNorm = 87.3787, GNorm = 0.6405, lr_0 = 9.8835e-04
Loss = 2.6230e-01, PNorm = 87.5469, GNorm = 1.2681, lr_0 = 9.8767e-04
Loss = 3.0027e-01, PNorm = 87.7092, GNorm = 0.9234, lr_0 = 9.8700e-04
Loss = 3.1839e-01, PNorm = 87.8894, GNorm = 1.3362, lr_0 = 9.8632e-04
Loss = 3.1943e-01, PNorm = 88.0576, GNorm = 0.9754, lr_0 = 9.8564e-04
Loss = 2.8082e-01, PNorm = 88.2418, GNorm = 1.0156, lr_0 = 9.8497e-04
Loss = 3.0690e-01, PNorm = 88.4125, GNorm = 1.4239, lr_0 = 9.8429e-04
Loss = 2.9882e-01, PNorm = 88.6177, GNorm = 0.9894, lr_0 = 9.8362e-04
Loss = 3.1247e-01, PNorm = 88.8082, GNorm = 1.3791, lr_0 = 9.8295e-04
Loss = 3.0748e-01, PNorm = 88.9993, GNorm = 1.2512, lr_0 = 9.8227e-04
Loss = 3.3704e-01, PNorm = 89.1888, GNorm = 1.1407, lr_0 = 9.8160e-04
Loss = 2.7517e-01, PNorm = 89.3791, GNorm = 1.0890, lr_0 = 9.8093e-04
Loss = 2.7825e-01, PNorm = 89.5672, GNorm = 0.9811, lr_0 = 9.8026e-04
Loss = 3.1183e-01, PNorm = 89.7465, GNorm = 0.8057, lr_0 = 9.7958e-04
Loss = 3.0986e-01, PNorm = 89.9173, GNorm = 0.7976, lr_0 = 9.7891e-04
Loss = 2.8035e-01, PNorm = 90.0876, GNorm = 0.8419, lr_0 = 9.7824e-04
Loss = 2.6511e-01, PNorm = 90.2533, GNorm = 1.4893, lr_0 = 9.7757e-04
Loss = 2.8982e-01, PNorm = 90.4130, GNorm = 1.5406, lr_0 = 9.7690e-04
Loss = 2.8640e-01, PNorm = 90.5851, GNorm = 1.1262, lr_0 = 9.7623e-04
Loss = 3.3251e-01, PNorm = 90.7548, GNorm = 0.9868, lr_0 = 9.7556e-04
Loss = 2.9061e-01, PNorm = 90.9183, GNorm = 1.0590, lr_0 = 9.7490e-04
Loss = 2.7789e-01, PNorm = 91.0842, GNorm = 0.9007, lr_0 = 9.7423e-04
Loss = 2.9491e-01, PNorm = 91.2619, GNorm = 0.7098, lr_0 = 9.7356e-04
Loss = 2.9525e-01, PNorm = 91.4357, GNorm = 0.9125, lr_0 = 9.7289e-04
Loss = 2.8406e-01, PNorm = 91.6257, GNorm = 1.5161, lr_0 = 9.7223e-04
Loss = 3.0288e-01, PNorm = 91.7862, GNorm = 0.9773, lr_0 = 9.7156e-04
Loss = 2.8527e-01, PNorm = 91.9792, GNorm = 1.0719, lr_0 = 9.7090e-04
Loss = 2.8031e-01, PNorm = 92.1498, GNorm = 1.0536, lr_0 = 9.7023e-04
Loss = 3.2485e-01, PNorm = 92.3283, GNorm = 2.0135, lr_0 = 9.6957e-04
Loss = 3.2842e-01, PNorm = 92.5080, GNorm = 1.1095, lr_0 = 9.6890e-04
Loss = 2.6888e-01, PNorm = 92.6971, GNorm = 1.0288, lr_0 = 9.6824e-04
Loss = 3.2182e-01, PNorm = 92.8521, GNorm = 1.2882, lr_0 = 9.6757e-04
Loss = 2.9581e-01, PNorm = 93.0124, GNorm = 1.0190, lr_0 = 9.6691e-04
Loss = 3.0709e-01, PNorm = 93.1738, GNorm = 0.9480, lr_0 = 9.6625e-04
Loss = 3.6078e-01, PNorm = 93.3386, GNorm = 1.2018, lr_0 = 9.6559e-04
Loss = 3.0892e-01, PNorm = 93.5049, GNorm = 1.0000, lr_0 = 9.6493e-04
Loss = 3.0051e-01, PNorm = 93.6740, GNorm = 1.1538, lr_0 = 9.6427e-04
Loss = 2.9499e-01, PNorm = 93.8381, GNorm = 0.9825, lr_0 = 9.6360e-04
Loss = 2.9900e-01, PNorm = 93.9900, GNorm = 1.3170, lr_0 = 9.6294e-04
Loss = 3.2182e-01, PNorm = 94.1487, GNorm = 1.2218, lr_0 = 9.6228e-04
Loss = 3.4276e-01, PNorm = 94.3168, GNorm = 1.6309, lr_0 = 9.6163e-04
Loss = 3.6745e-01, PNorm = 94.4963, GNorm = 1.1012, lr_0 = 9.6097e-04
Loss = 2.9616e-01, PNorm = 94.6555, GNorm = 1.1965, lr_0 = 9.6031e-04
Loss = 3.1071e-01, PNorm = 94.8299, GNorm = 0.9347, lr_0 = 9.5965e-04
Loss = 3.2992e-01, PNorm = 94.9972, GNorm = 1.1303, lr_0 = 9.5899e-04
Loss = 3.6119e-01, PNorm = 95.1732, GNorm = 0.9700, lr_0 = 9.5834e-04
Loss = 3.2297e-01, PNorm = 95.3550, GNorm = 1.2477, lr_0 = 9.5768e-04
Loss = 3.3131e-01, PNorm = 95.5009, GNorm = 1.0448, lr_0 = 9.5702e-04
Loss = 2.7648e-01, PNorm = 95.6715, GNorm = 1.5782, lr_0 = 9.5637e-04
Loss = 3.5146e-01, PNorm = 95.8241, GNorm = 1.5549, lr_0 = 9.5571e-04
Loss = 3.2410e-01, PNorm = 95.9758, GNorm = 0.9073, lr_0 = 9.5506e-04
Loss = 3.3350e-01, PNorm = 96.1484, GNorm = 0.9101, lr_0 = 9.5440e-04
Loss = 3.4607e-01, PNorm = 96.3146, GNorm = 1.3620, lr_0 = 9.5375e-04
Loss = 3.0644e-01, PNorm = 96.4942, GNorm = 0.9528, lr_0 = 9.5310e-04
Loss = 3.3210e-01, PNorm = 96.6641, GNorm = 1.0613, lr_0 = 9.5244e-04
Loss = 3.2443e-01, PNorm = 96.8316, GNorm = 1.1344, lr_0 = 9.5179e-04
Loss = 3.1742e-01, PNorm = 96.9993, GNorm = 0.8955, lr_0 = 9.5114e-04
Loss = 3.2802e-01, PNorm = 97.1648, GNorm = 0.8981, lr_0 = 9.5049e-04
Loss = 3.1317e-01, PNorm = 97.3367, GNorm = 1.0591, lr_0 = 9.4984e-04
Loss = 3.1684e-01, PNorm = 97.5061, GNorm = 1.0115, lr_0 = 9.4919e-04
Loss = 3.3684e-01, PNorm = 97.6827, GNorm = 1.4546, lr_0 = 9.4854e-04
Loss = 3.5576e-01, PNorm = 97.8465, GNorm = 0.9317, lr_0 = 9.4789e-04
Loss = 3.2005e-01, PNorm = 98.0119, GNorm = 1.3198, lr_0 = 9.4724e-04
Loss = 2.9792e-01, PNorm = 98.1696, GNorm = 0.9725, lr_0 = 9.4659e-04
Loss = 3.3836e-01, PNorm = 98.3281, GNorm = 2.3205, lr_0 = 9.4594e-04
Loss = 3.2630e-01, PNorm = 98.4845, GNorm = 0.8042, lr_0 = 9.4529e-04
Loss = 3.3371e-01, PNorm = 98.6463, GNorm = 1.1029, lr_0 = 9.4464e-04
Loss = 3.3816e-01, PNorm = 98.8032, GNorm = 1.5335, lr_0 = 9.4400e-04
Loss = 3.2110e-01, PNorm = 98.9574, GNorm = 0.8343, lr_0 = 9.4335e-04
Loss = 3.2644e-01, PNorm = 99.1164, GNorm = 1.4820, lr_0 = 9.4270e-04
Loss = 3.0277e-01, PNorm = 99.2631, GNorm = 0.9558, lr_0 = 9.4206e-04
Loss = 2.8055e-01, PNorm = 99.3943, GNorm = 0.8924, lr_0 = 9.4141e-04
Loss = 3.2551e-01, PNorm = 99.5302, GNorm = 1.3599, lr_0 = 9.4077e-04
Loss = 3.2747e-01, PNorm = 99.6753, GNorm = 0.9574, lr_0 = 9.4012e-04
Loss = 3.4809e-01, PNorm = 99.8292, GNorm = 1.2140, lr_0 = 9.3948e-04
Loss = 3.1193e-01, PNorm = 99.9831, GNorm = 0.6103, lr_0 = 9.3884e-04
Loss = 3.6359e-01, PNorm = 100.1322, GNorm = 1.2537, lr_0 = 9.3819e-04
Loss = 3.3028e-01, PNorm = 100.2756, GNorm = 1.2087, lr_0 = 9.3755e-04
Loss = 3.3800e-01, PNorm = 100.4248, GNorm = 0.8598, lr_0 = 9.3691e-04
Loss = 3.2397e-01, PNorm = 100.5697, GNorm = 1.6036, lr_0 = 9.3627e-04
Loss = 3.3538e-01, PNorm = 100.7123, GNorm = 0.7451, lr_0 = 9.3562e-04
Loss = 3.5298e-01, PNorm = 100.8594, GNorm = 1.2696, lr_0 = 9.3498e-04
Loss = 3.2201e-01, PNorm = 101.0128, GNorm = 0.9896, lr_0 = 9.3434e-04
Loss = 3.1936e-01, PNorm = 101.1580, GNorm = 1.0153, lr_0 = 9.3370e-04
Loss = 3.1663e-01, PNorm = 101.2925, GNorm = 1.5895, lr_0 = 9.3306e-04
Loss = 4.1011e-01, PNorm = 101.4477, GNorm = 0.9790, lr_0 = 9.3242e-04
Loss = 2.9232e-01, PNorm = 101.5994, GNorm = 0.8136, lr_0 = 9.3178e-04
Loss = 3.2833e-01, PNorm = 101.7327, GNorm = 0.8144, lr_0 = 9.3115e-04
Loss = 2.9429e-01, PNorm = 101.8649, GNorm = 0.7994, lr_0 = 9.3051e-04
Loss = 3.1045e-01, PNorm = 101.9943, GNorm = 1.0543, lr_0 = 9.2987e-04
Loss = 3.0129e-01, PNorm = 102.1240, GNorm = 0.9652, lr_0 = 9.2923e-04
Loss = 3.4611e-01, PNorm = 102.2749, GNorm = 0.8451, lr_0 = 9.2860e-04
Loss = 3.1235e-01, PNorm = 102.4166, GNorm = 0.8988, lr_0 = 9.2796e-04
Loss = 3.4003e-01, PNorm = 102.5562, GNorm = 1.2069, lr_0 = 9.2733e-04
Loss = 3.3630e-01, PNorm = 102.6980, GNorm = 0.9855, lr_0 = 9.2669e-04
Loss = 3.4134e-01, PNorm = 102.8373, GNorm = 1.6446, lr_0 = 9.2606e-04
Loss = 3.5639e-01, PNorm = 102.9815, GNorm = 0.9438, lr_0 = 9.2542e-04
Loss = 2.7791e-01, PNorm = 103.1162, GNorm = 0.9878, lr_0 = 9.2479e-04
Loss = 3.2088e-01, PNorm = 103.2413, GNorm = 1.0693, lr_0 = 9.2415e-04
Loss = 3.1380e-01, PNorm = 103.3590, GNorm = 0.9172, lr_0 = 9.2352e-04
Loss = 3.2334e-01, PNorm = 103.4793, GNorm = 0.7484, lr_0 = 9.2289e-04
Loss = 3.3365e-01, PNorm = 103.6045, GNorm = 1.0462, lr_0 = 9.2226e-04
Loss = 3.0971e-01, PNorm = 103.7226, GNorm = 1.2038, lr_0 = 9.2162e-04
Loss = 3.5539e-01, PNorm = 103.8601, GNorm = 1.0326, lr_0 = 9.2099e-04
Validation mae = 0.126245
Epoch 3
Loss = 2.2607e-01, PNorm = 103.9830, GNorm = 0.6888, lr_0 = 9.2036e-04
Loss = 1.8489e-01, PNorm = 104.1000, GNorm = 1.0587, lr_0 = 9.1973e-04
Loss = 1.6394e-01, PNorm = 104.2053, GNorm = 0.8740, lr_0 = 9.1910e-04
Loss = 2.0566e-01, PNorm = 104.3076, GNorm = 0.7994, lr_0 = 9.1847e-04
Loss = 1.9078e-01, PNorm = 104.4060, GNorm = 1.1172, lr_0 = 9.1784e-04
Loss = 2.1090e-01, PNorm = 104.5013, GNorm = 0.6429, lr_0 = 9.1721e-04
Loss = 1.5565e-01, PNorm = 104.6055, GNorm = 0.8021, lr_0 = 9.1658e-04
Loss = 1.7203e-01, PNorm = 104.7056, GNorm = 0.6803, lr_0 = 9.1596e-04
Loss = 1.5826e-01, PNorm = 104.7941, GNorm = 0.6136, lr_0 = 9.1533e-04
Loss = 1.4067e-01, PNorm = 104.8875, GNorm = 1.5595, lr_0 = 9.1470e-04
Loss = 1.7308e-01, PNorm = 104.9741, GNorm = 0.9414, lr_0 = 9.1408e-04
Loss = 1.6718e-01, PNorm = 105.0718, GNorm = 0.5397, lr_0 = 9.1345e-04
Loss = 1.8137e-01, PNorm = 105.1892, GNorm = 1.0059, lr_0 = 9.1282e-04
Loss = 1.7655e-01, PNorm = 105.2868, GNorm = 0.5999, lr_0 = 9.1220e-04
Loss = 1.8380e-01, PNorm = 105.3816, GNorm = 1.1291, lr_0 = 9.1157e-04
Loss = 2.0647e-01, PNorm = 105.4967, GNorm = 0.8112, lr_0 = 9.1095e-04
Loss = 1.9815e-01, PNorm = 105.6053, GNorm = 1.2615, lr_0 = 9.1032e-04
Loss = 1.7012e-01, PNorm = 105.7176, GNorm = 0.6208, lr_0 = 9.0970e-04
Loss = 1.6005e-01, PNorm = 105.8168, GNorm = 1.2979, lr_0 = 9.0908e-04
Loss = 1.8323e-01, PNorm = 105.9140, GNorm = 0.8702, lr_0 = 9.0846e-04
Loss = 2.0059e-01, PNorm = 106.0213, GNorm = 0.7599, lr_0 = 9.0783e-04
Loss = 1.8034e-01, PNorm = 106.1484, GNorm = 0.8304, lr_0 = 9.0721e-04
Loss = 1.6705e-01, PNorm = 106.2573, GNorm = 0.6597, lr_0 = 9.0659e-04
Loss = 1.8969e-01, PNorm = 106.3688, GNorm = 0.6960, lr_0 = 9.0597e-04
Loss = 1.9900e-01, PNorm = 106.4828, GNorm = 1.0112, lr_0 = 9.0535e-04
Loss = 1.5976e-01, PNorm = 106.5941, GNorm = 0.8552, lr_0 = 9.0473e-04
Loss = 1.9639e-01, PNorm = 106.7164, GNorm = 0.8406, lr_0 = 9.0411e-04
Loss = 1.7901e-01, PNorm = 106.8353, GNorm = 0.6883, lr_0 = 9.0349e-04
Loss = 1.7963e-01, PNorm = 106.9578, GNorm = 0.8150, lr_0 = 9.0287e-04
Loss = 1.9764e-01, PNorm = 107.0804, GNorm = 0.8794, lr_0 = 9.0225e-04
Loss = 1.8366e-01, PNorm = 107.2070, GNorm = 1.6558, lr_0 = 9.0163e-04
Loss = 1.8722e-01, PNorm = 107.3338, GNorm = 0.8693, lr_0 = 9.0102e-04
Loss = 2.0031e-01, PNorm = 107.4571, GNorm = 0.6767, lr_0 = 9.0040e-04
Loss = 2.1032e-01, PNorm = 107.5841, GNorm = 0.7127, lr_0 = 8.9978e-04
Loss = 1.9845e-01, PNorm = 107.7010, GNorm = 1.2368, lr_0 = 8.9916e-04
Loss = 1.8723e-01, PNorm = 107.8389, GNorm = 0.7940, lr_0 = 8.9855e-04
Loss = 2.0029e-01, PNorm = 107.9683, GNorm = 1.3519, lr_0 = 8.9793e-04
Loss = 2.0537e-01, PNorm = 108.0845, GNorm = 1.0412, lr_0 = 8.9732e-04
Loss = 1.7824e-01, PNorm = 108.2104, GNorm = 0.9141, lr_0 = 8.9670e-04
Loss = 1.7323e-01, PNorm = 108.3223, GNorm = 0.6908, lr_0 = 8.9609e-04
Loss = 2.0360e-01, PNorm = 108.4461, GNorm = 0.8507, lr_0 = 8.9548e-04
Loss = 1.6576e-01, PNorm = 108.5673, GNorm = 0.7642, lr_0 = 8.9486e-04
Loss = 1.7669e-01, PNorm = 108.6691, GNorm = 1.2129, lr_0 = 8.9425e-04
Loss = 2.2874e-01, PNorm = 108.7879, GNorm = 0.5358, lr_0 = 8.9364e-04
Loss = 1.8959e-01, PNorm = 108.9078, GNorm = 1.2727, lr_0 = 8.9302e-04
Loss = 2.1408e-01, PNorm = 109.0258, GNorm = 0.7345, lr_0 = 8.9241e-04
Loss = 1.9206e-01, PNorm = 109.1544, GNorm = 1.2195, lr_0 = 8.9180e-04
Loss = 2.1632e-01, PNorm = 109.2798, GNorm = 0.9349, lr_0 = 8.9119e-04
Loss = 1.9557e-01, PNorm = 109.4035, GNorm = 1.0162, lr_0 = 8.9058e-04
Loss = 1.8323e-01, PNorm = 109.5209, GNorm = 0.8272, lr_0 = 8.8997e-04
Loss = 1.9893e-01, PNorm = 109.6299, GNorm = 0.8025, lr_0 = 8.8936e-04
Loss = 2.2541e-01, PNorm = 109.7595, GNorm = 0.9624, lr_0 = 8.8875e-04
Loss = 1.8800e-01, PNorm = 109.8927, GNorm = 1.1673, lr_0 = 8.8814e-04
Loss = 2.2059e-01, PNorm = 110.0036, GNorm = 0.8779, lr_0 = 8.8753e-04
Loss = 1.9059e-01, PNorm = 110.1190, GNorm = 0.8139, lr_0 = 8.8693e-04
Loss = 1.7728e-01, PNorm = 110.2281, GNorm = 0.8498, lr_0 = 8.8632e-04
Loss = 2.1361e-01, PNorm = 110.3531, GNorm = 0.6946, lr_0 = 8.8571e-04
Loss = 1.8825e-01, PNorm = 110.4659, GNorm = 0.8009, lr_0 = 8.8510e-04
Loss = 2.0321e-01, PNorm = 110.5803, GNorm = 0.7920, lr_0 = 8.8450e-04
Loss = 1.7716e-01, PNorm = 110.6979, GNorm = 1.3442, lr_0 = 8.8389e-04
Loss = 1.7153e-01, PNorm = 110.8240, GNorm = 0.8102, lr_0 = 8.8329e-04
Loss = 1.8023e-01, PNorm = 110.9337, GNorm = 0.8938, lr_0 = 8.8268e-04
Loss = 2.2307e-01, PNorm = 111.0575, GNorm = 0.8837, lr_0 = 8.8208e-04
Loss = 2.1061e-01, PNorm = 111.1871, GNorm = 1.0680, lr_0 = 8.8147e-04
Loss = 2.1078e-01, PNorm = 111.3007, GNorm = 0.6847, lr_0 = 8.8087e-04
Loss = 2.2933e-01, PNorm = 111.4256, GNorm = 0.8321, lr_0 = 8.8026e-04
Loss = 1.9763e-01, PNorm = 111.5398, GNorm = 1.5903, lr_0 = 8.7966e-04
Loss = 1.9323e-01, PNorm = 111.6585, GNorm = 0.8768, lr_0 = 8.7906e-04
Loss = 1.9897e-01, PNorm = 111.7705, GNorm = 0.7172, lr_0 = 8.7846e-04
Loss = 1.9869e-01, PNorm = 111.8801, GNorm = 0.7513, lr_0 = 8.7785e-04
Loss = 1.8869e-01, PNorm = 111.9945, GNorm = 0.6499, lr_0 = 8.7725e-04
Loss = 1.8810e-01, PNorm = 112.1229, GNorm = 1.1895, lr_0 = 8.7665e-04
Loss = 2.0767e-01, PNorm = 112.2311, GNorm = 1.0049, lr_0 = 8.7605e-04
Loss = 2.2240e-01, PNorm = 112.3485, GNorm = 0.9263, lr_0 = 8.7545e-04
Loss = 1.9642e-01, PNorm = 112.4653, GNorm = 0.8358, lr_0 = 8.7485e-04
Loss = 1.9998e-01, PNorm = 112.5787, GNorm = 1.0453, lr_0 = 8.7425e-04
Loss = 1.7832e-01, PNorm = 112.6915, GNorm = 0.7602, lr_0 = 8.7365e-04
Loss = 2.4639e-01, PNorm = 112.8047, GNorm = 1.9184, lr_0 = 8.7306e-04
Loss = 2.1417e-01, PNorm = 112.9328, GNorm = 0.8825, lr_0 = 8.7246e-04
Loss = 1.9062e-01, PNorm = 113.0558, GNorm = 1.0263, lr_0 = 8.7186e-04
Loss = 1.9913e-01, PNorm = 113.1760, GNorm = 0.8786, lr_0 = 8.7126e-04
Loss = 2.0945e-01, PNorm = 113.3084, GNorm = 0.6614, lr_0 = 8.7067e-04
Loss = 2.1214e-01, PNorm = 113.4251, GNorm = 1.0897, lr_0 = 8.7007e-04
Loss = 2.1068e-01, PNorm = 113.5416, GNorm = 0.8570, lr_0 = 8.6947e-04
Loss = 2.3125e-01, PNorm = 113.6691, GNorm = 0.7682, lr_0 = 8.6888e-04
Loss = 1.9917e-01, PNorm = 113.7866, GNorm = 0.9861, lr_0 = 8.6828e-04
Loss = 2.4009e-01, PNorm = 113.9127, GNorm = 1.2015, lr_0 = 8.6769e-04
Loss = 2.2411e-01, PNorm = 114.0532, GNorm = 0.6624, lr_0 = 8.6709e-04
Loss = 2.0903e-01, PNorm = 114.1823, GNorm = 0.7499, lr_0 = 8.6650e-04
Loss = 1.6374e-01, PNorm = 114.3016, GNorm = 0.7575, lr_0 = 8.6590e-04
Loss = 2.3531e-01, PNorm = 114.4220, GNorm = 0.7345, lr_0 = 8.6531e-04
Loss = 2.2593e-01, PNorm = 114.5630, GNorm = 0.9815, lr_0 = 8.6472e-04
Loss = 2.4017e-01, PNorm = 114.6866, GNorm = 0.7787, lr_0 = 8.6413e-04
Loss = 2.3051e-01, PNorm = 114.8255, GNorm = 0.7537, lr_0 = 8.6353e-04
Loss = 2.2158e-01, PNorm = 114.9546, GNorm = 1.2810, lr_0 = 8.6294e-04
Loss = 2.3767e-01, PNorm = 115.0900, GNorm = 0.9246, lr_0 = 8.6235e-04
Loss = 1.8758e-01, PNorm = 115.2254, GNorm = 1.5825, lr_0 = 8.6176e-04
Loss = 2.0409e-01, PNorm = 115.3461, GNorm = 0.6333, lr_0 = 8.6117e-04
Loss = 2.1408e-01, PNorm = 115.4815, GNorm = 0.7783, lr_0 = 8.6058e-04
Loss = 2.4362e-01, PNorm = 115.6027, GNorm = 0.7484, lr_0 = 8.5999e-04
Loss = 2.3615e-01, PNorm = 115.7394, GNorm = 0.8930, lr_0 = 8.5940e-04
Loss = 1.5270e-01, PNorm = 115.8673, GNorm = 0.8497, lr_0 = 8.5881e-04
Loss = 2.5670e-01, PNorm = 115.9963, GNorm = 1.1005, lr_0 = 8.5823e-04
Loss = 2.5877e-01, PNorm = 116.1114, GNorm = 1.1816, lr_0 = 8.5764e-04
Loss = 2.2358e-01, PNorm = 116.2518, GNorm = 0.7645, lr_0 = 8.5705e-04
Loss = 2.5639e-01, PNorm = 116.3843, GNorm = 1.0418, lr_0 = 8.5646e-04
Loss = 2.2945e-01, PNorm = 116.5149, GNorm = 0.8233, lr_0 = 8.5588e-04
Loss = 2.6436e-01, PNorm = 116.6555, GNorm = 1.7363, lr_0 = 8.5529e-04
Loss = 2.2700e-01, PNorm = 116.7868, GNorm = 0.8627, lr_0 = 8.5470e-04
Loss = 2.1524e-01, PNorm = 116.9202, GNorm = 1.2813, lr_0 = 8.5412e-04
Loss = 2.1916e-01, PNorm = 117.0536, GNorm = 0.9726, lr_0 = 8.5353e-04
Loss = 2.2662e-01, PNorm = 117.1843, GNorm = 1.3315, lr_0 = 8.5295e-04
Loss = 2.4746e-01, PNorm = 117.3014, GNorm = 1.5102, lr_0 = 8.5236e-04
Loss = 2.5678e-01, PNorm = 117.4244, GNorm = 1.7216, lr_0 = 8.5178e-04
Loss = 2.0029e-01, PNorm = 117.5540, GNorm = 1.0392, lr_0 = 8.5120e-04
Loss = 2.0340e-01, PNorm = 117.6743, GNorm = 0.8832, lr_0 = 8.5061e-04
Loss = 2.4902e-01, PNorm = 117.7916, GNorm = 1.2612, lr_0 = 8.5003e-04
Loss = 2.2808e-01, PNorm = 117.9130, GNorm = 0.6900, lr_0 = 8.4945e-04
Loss = 2.1823e-01, PNorm = 118.0390, GNorm = 0.7153, lr_0 = 8.4887e-04
Loss = 2.3431e-01, PNorm = 118.1670, GNorm = 0.5874, lr_0 = 8.4828e-04
Validation mae = 0.124829
Epoch 4
Loss = 1.3005e-01, PNorm = 118.2899, GNorm = 1.1560, lr_0 = 8.4770e-04
Loss = 1.3196e-01, PNorm = 118.3846, GNorm = 0.9596, lr_0 = 8.4712e-04
Loss = 1.2473e-01, PNorm = 118.4689, GNorm = 0.8418, lr_0 = 8.4654e-04
Loss = 1.3588e-01, PNorm = 118.5578, GNorm = 0.6479, lr_0 = 8.4596e-04
Loss = 1.3162e-01, PNorm = 118.6405, GNorm = 0.8519, lr_0 = 8.4538e-04
Loss = 1.2495e-01, PNorm = 118.7255, GNorm = 0.6485, lr_0 = 8.4480e-04
Loss = 1.1831e-01, PNorm = 118.8103, GNorm = 0.8174, lr_0 = 8.4423e-04
Loss = 1.3110e-01, PNorm = 118.8977, GNorm = 0.7102, lr_0 = 8.4365e-04
Loss = 1.0012e-01, PNorm = 118.9706, GNorm = 0.5103, lr_0 = 8.4307e-04
Loss = 1.3229e-01, PNorm = 119.0398, GNorm = 0.7740, lr_0 = 8.4249e-04
Loss = 1.0510e-01, PNorm = 119.1146, GNorm = 0.7872, lr_0 = 8.4191e-04
Loss = 1.0877e-01, PNorm = 119.1865, GNorm = 0.4880, lr_0 = 8.4134e-04
Loss = 1.2361e-01, PNorm = 119.2610, GNorm = 0.8412, lr_0 = 8.4076e-04
Loss = 1.3148e-01, PNorm = 119.3416, GNorm = 0.6297, lr_0 = 8.4019e-04
Loss = 1.0426e-01, PNorm = 119.4109, GNorm = 1.1526, lr_0 = 8.3961e-04
Loss = 1.1224e-01, PNorm = 119.4786, GNorm = 0.7060, lr_0 = 8.3903e-04
Loss = 1.0176e-01, PNorm = 119.5398, GNorm = 0.6024, lr_0 = 8.3846e-04
Loss = 1.1522e-01, PNorm = 119.6129, GNorm = 0.7158, lr_0 = 8.3789e-04
Loss = 1.2695e-01, PNorm = 119.6788, GNorm = 0.7041, lr_0 = 8.3731e-04
Loss = 1.3740e-01, PNorm = 119.7647, GNorm = 0.8061, lr_0 = 8.3674e-04
Loss = 1.1766e-01, PNorm = 119.8397, GNorm = 0.7149, lr_0 = 8.3616e-04
Loss = 1.1951e-01, PNorm = 119.9280, GNorm = 0.6114, lr_0 = 8.3559e-04
Loss = 1.1721e-01, PNorm = 120.0069, GNorm = 0.5175, lr_0 = 8.3502e-04
Loss = 1.0680e-01, PNorm = 120.0867, GNorm = 0.6595, lr_0 = 8.3445e-04
Loss = 1.2028e-01, PNorm = 120.1617, GNorm = 0.5772, lr_0 = 8.3388e-04
Loss = 1.1741e-01, PNorm = 120.2435, GNorm = 0.5901, lr_0 = 8.3330e-04
Loss = 1.1895e-01, PNorm = 120.3274, GNorm = 1.0592, lr_0 = 8.3273e-04
Loss = 1.1961e-01, PNorm = 120.3994, GNorm = 0.6537, lr_0 = 8.3216e-04
Loss = 1.1135e-01, PNorm = 120.4940, GNorm = 1.1477, lr_0 = 8.3159e-04
Loss = 1.1918e-01, PNorm = 120.5710, GNorm = 0.6629, lr_0 = 8.3102e-04
Loss = 1.2649e-01, PNorm = 120.6547, GNorm = 0.4513, lr_0 = 8.3045e-04
Loss = 1.1935e-01, PNorm = 120.7447, GNorm = 0.6722, lr_0 = 8.2988e-04
Loss = 1.1855e-01, PNorm = 120.8258, GNorm = 0.5148, lr_0 = 8.2932e-04
Loss = 1.0646e-01, PNorm = 120.9111, GNorm = 0.5412, lr_0 = 8.2875e-04
Loss = 1.1886e-01, PNorm = 121.0020, GNorm = 0.6254, lr_0 = 8.2818e-04
Loss = 1.1092e-01, PNorm = 121.0883, GNorm = 0.7522, lr_0 = 8.2761e-04
Loss = 1.1236e-01, PNorm = 121.1640, GNorm = 1.5149, lr_0 = 8.2705e-04
Loss = 1.1548e-01, PNorm = 121.2478, GNorm = 0.5616, lr_0 = 8.2648e-04
Loss = 1.1441e-01, PNorm = 121.3350, GNorm = 1.1546, lr_0 = 8.2591e-04
Loss = 1.3977e-01, PNorm = 121.4103, GNorm = 0.6059, lr_0 = 8.2535e-04
Loss = 1.1557e-01, PNorm = 121.4934, GNorm = 0.5249, lr_0 = 8.2478e-04
Loss = 1.1795e-01, PNorm = 121.5777, GNorm = 0.9511, lr_0 = 8.2422e-04
Loss = 1.3049e-01, PNorm = 121.6655, GNorm = 1.0993, lr_0 = 8.2365e-04
Loss = 1.0923e-01, PNorm = 121.7389, GNorm = 0.9344, lr_0 = 8.2309e-04
Loss = 1.1192e-01, PNorm = 121.8304, GNorm = 0.9346, lr_0 = 8.2252e-04
Loss = 1.2784e-01, PNorm = 121.9164, GNorm = 0.6316, lr_0 = 8.2196e-04
Loss = 1.3133e-01, PNorm = 122.0073, GNorm = 0.7206, lr_0 = 8.2140e-04
Loss = 1.2338e-01, PNorm = 122.0975, GNorm = 0.5790, lr_0 = 8.2084e-04
Loss = 1.4923e-01, PNorm = 122.1856, GNorm = 1.1611, lr_0 = 8.2027e-04
Loss = 1.1744e-01, PNorm = 122.2783, GNorm = 1.3129, lr_0 = 8.1971e-04
Loss = 1.3192e-01, PNorm = 122.3714, GNorm = 0.8019, lr_0 = 8.1915e-04
Loss = 1.2328e-01, PNorm = 122.4635, GNorm = 1.0314, lr_0 = 8.1859e-04
Loss = 1.2726e-01, PNorm = 122.5495, GNorm = 0.6192, lr_0 = 8.1803e-04
Loss = 1.1754e-01, PNorm = 122.6370, GNorm = 0.7974, lr_0 = 8.1747e-04
Loss = 1.1602e-01, PNorm = 122.7203, GNorm = 0.7905, lr_0 = 8.1691e-04
Loss = 1.4978e-01, PNorm = 122.7856, GNorm = 0.9411, lr_0 = 8.1635e-04
Loss = 1.2670e-01, PNorm = 122.8805, GNorm = 1.0804, lr_0 = 8.1579e-04
Loss = 1.1531e-01, PNorm = 122.9752, GNorm = 0.7989, lr_0 = 8.1523e-04
Loss = 1.2532e-01, PNorm = 123.0611, GNorm = 1.0276, lr_0 = 8.1467e-04
Loss = 1.2796e-01, PNorm = 123.1557, GNorm = 0.7227, lr_0 = 8.1411e-04
Loss = 1.3472e-01, PNorm = 123.2392, GNorm = 0.7311, lr_0 = 8.1355e-04
Loss = 1.4094e-01, PNorm = 123.3357, GNorm = 1.3679, lr_0 = 8.1300e-04
Loss = 1.3789e-01, PNorm = 123.4288, GNorm = 0.6286, lr_0 = 8.1244e-04
Loss = 1.3475e-01, PNorm = 123.5205, GNorm = 0.8701, lr_0 = 8.1188e-04
Loss = 1.4508e-01, PNorm = 123.6100, GNorm = 1.3557, lr_0 = 8.1133e-04
Loss = 1.1561e-01, PNorm = 123.6990, GNorm = 0.9428, lr_0 = 8.1077e-04
Loss = 1.6497e-01, PNorm = 123.7916, GNorm = 1.3594, lr_0 = 8.1022e-04
Loss = 1.4023e-01, PNorm = 123.8834, GNorm = 0.5274, lr_0 = 8.0966e-04
Loss = 1.5136e-01, PNorm = 123.9784, GNorm = 1.0266, lr_0 = 8.0911e-04
Loss = 1.3721e-01, PNorm = 124.0733, GNorm = 1.1630, lr_0 = 8.0855e-04
Loss = 1.3249e-01, PNorm = 124.1624, GNorm = 0.8413, lr_0 = 8.0800e-04
Loss = 1.3566e-01, PNorm = 124.2565, GNorm = 0.6979, lr_0 = 8.0745e-04
Loss = 1.3293e-01, PNorm = 124.3648, GNorm = 0.9573, lr_0 = 8.0689e-04
Loss = 1.4170e-01, PNorm = 124.4585, GNorm = 0.8347, lr_0 = 8.0634e-04
Loss = 1.8161e-01, PNorm = 124.5698, GNorm = 0.5150, lr_0 = 8.0579e-04
Loss = 1.3385e-01, PNorm = 124.6873, GNorm = 0.6099, lr_0 = 8.0523e-04
Loss = 1.3404e-01, PNorm = 124.7856, GNorm = 0.8727, lr_0 = 8.0468e-04
Loss = 1.6954e-01, PNorm = 124.8870, GNorm = 1.3995, lr_0 = 8.0413e-04
Loss = 1.4244e-01, PNorm = 124.9815, GNorm = 0.6112, lr_0 = 8.0358e-04
Loss = 1.4912e-01, PNorm = 125.0903, GNorm = 0.7168, lr_0 = 8.0303e-04
Loss = 1.2841e-01, PNorm = 125.2037, GNorm = 1.0358, lr_0 = 8.0248e-04
Loss = 1.3324e-01, PNorm = 125.3023, GNorm = 0.7610, lr_0 = 8.0193e-04
Loss = 1.4817e-01, PNorm = 125.4051, GNorm = 1.2530, lr_0 = 8.0138e-04
Loss = 1.4425e-01, PNorm = 125.5095, GNorm = 1.0592, lr_0 = 8.0083e-04
Loss = 1.7582e-01, PNorm = 125.6050, GNorm = 0.9842, lr_0 = 8.0028e-04
Loss = 1.7979e-01, PNorm = 125.7107, GNorm = 0.7099, lr_0 = 7.9974e-04
Loss = 1.2537e-01, PNorm = 125.8150, GNorm = 0.5644, lr_0 = 7.9919e-04
Loss = 1.3147e-01, PNorm = 125.9175, GNorm = 0.6293, lr_0 = 7.9864e-04
Loss = 1.1936e-01, PNorm = 126.0155, GNorm = 1.2912, lr_0 = 7.9809e-04
Loss = 1.9218e-01, PNorm = 126.1032, GNorm = 2.1520, lr_0 = 7.9755e-04
Loss = 1.3802e-01, PNorm = 126.1881, GNorm = 1.3507, lr_0 = 7.9700e-04
Loss = 1.6446e-01, PNorm = 126.2990, GNorm = 0.6852, lr_0 = 7.9645e-04
Loss = 1.6855e-01, PNorm = 126.3922, GNorm = 0.8246, lr_0 = 7.9591e-04
Loss = 1.4647e-01, PNorm = 126.4944, GNorm = 0.7838, lr_0 = 7.9536e-04
Loss = 1.3971e-01, PNorm = 126.5972, GNorm = 0.5915, lr_0 = 7.9482e-04
Loss = 1.4745e-01, PNorm = 126.6964, GNorm = 0.9864, lr_0 = 7.9427e-04
Loss = 1.3952e-01, PNorm = 126.7979, GNorm = 0.5653, lr_0 = 7.9373e-04
Loss = 1.3509e-01, PNorm = 126.8967, GNorm = 0.9115, lr_0 = 7.9319e-04
Loss = 1.5236e-01, PNorm = 126.9898, GNorm = 0.6801, lr_0 = 7.9264e-04
Loss = 1.5419e-01, PNorm = 127.0860, GNorm = 0.9797, lr_0 = 7.9210e-04
Loss = 1.4445e-01, PNorm = 127.1852, GNorm = 0.6537, lr_0 = 7.9156e-04
Loss = 1.4932e-01, PNorm = 127.2880, GNorm = 0.5583, lr_0 = 7.9101e-04
Loss = 1.4643e-01, PNorm = 127.3866, GNorm = 0.5811, lr_0 = 7.9047e-04
Loss = 1.2692e-01, PNorm = 127.4874, GNorm = 0.5709, lr_0 = 7.8993e-04
Loss = 1.3426e-01, PNorm = 127.5918, GNorm = 0.7845, lr_0 = 7.8939e-04
Loss = 1.5652e-01, PNorm = 127.7060, GNorm = 0.7947, lr_0 = 7.8885e-04
Loss = 1.2321e-01, PNorm = 127.8195, GNorm = 0.7937, lr_0 = 7.8831e-04
Loss = 1.3804e-01, PNorm = 127.9278, GNorm = 0.9133, lr_0 = 7.8777e-04
Loss = 1.4263e-01, PNorm = 128.0303, GNorm = 0.7712, lr_0 = 7.8723e-04
Loss = 1.4822e-01, PNorm = 128.1283, GNorm = 0.7679, lr_0 = 7.8669e-04
Loss = 1.4204e-01, PNorm = 128.2353, GNorm = 1.3996, lr_0 = 7.8615e-04
Loss = 1.4441e-01, PNorm = 128.3339, GNorm = 0.7065, lr_0 = 7.8561e-04
Loss = 1.5154e-01, PNorm = 128.4442, GNorm = 1.6294, lr_0 = 7.8507e-04
Loss = 1.4902e-01, PNorm = 128.5457, GNorm = 0.6956, lr_0 = 7.8454e-04
Loss = 1.4635e-01, PNorm = 128.6584, GNorm = 1.0856, lr_0 = 7.8400e-04
Loss = 1.2706e-01, PNorm = 128.7596, GNorm = 0.6505, lr_0 = 7.8346e-04
Loss = 1.4070e-01, PNorm = 128.8549, GNorm = 0.7385, lr_0 = 7.8293e-04
Loss = 1.4761e-01, PNorm = 128.9583, GNorm = 1.0752, lr_0 = 7.8239e-04
Loss = 1.3829e-01, PNorm = 129.0525, GNorm = 0.7355, lr_0 = 7.8185e-04
Loss = 1.4013e-01, PNorm = 129.1489, GNorm = 1.1932, lr_0 = 7.8132e-04
Validation mae = 0.124114
Epoch 5
Loss = 8.4058e-02, PNorm = 129.2344, GNorm = 0.7902, lr_0 = 7.8078e-04
Loss = 9.4097e-02, PNorm = 129.2939, GNorm = 1.2756, lr_0 = 7.8025e-04
Loss = 8.9777e-02, PNorm = 129.3620, GNorm = 0.5359, lr_0 = 7.7971e-04
Loss = 9.6054e-02, PNorm = 129.4359, GNorm = 0.7559, lr_0 = 7.7918e-04
Loss = 7.7646e-02, PNorm = 129.4917, GNorm = 0.5166, lr_0 = 7.7864e-04
Loss = 8.6604e-02, PNorm = 129.5552, GNorm = 1.0524, lr_0 = 7.7811e-04
Loss = 7.0427e-02, PNorm = 129.6168, GNorm = 0.3930, lr_0 = 7.7758e-04
Loss = 7.5335e-02, PNorm = 129.6658, GNorm = 0.4964, lr_0 = 7.7705e-04
Loss = 8.2282e-02, PNorm = 129.7200, GNorm = 0.5200, lr_0 = 7.7651e-04
Loss = 8.6668e-02, PNorm = 129.7801, GNorm = 0.6966, lr_0 = 7.7598e-04
Loss = 7.9279e-02, PNorm = 129.8413, GNorm = 0.5935, lr_0 = 7.7545e-04
Loss = 9.0491e-02, PNorm = 129.8955, GNorm = 1.4546, lr_0 = 7.7492e-04
Loss = 8.0932e-02, PNorm = 129.9536, GNorm = 0.5129, lr_0 = 7.7439e-04
Loss = 1.0440e-01, PNorm = 130.0101, GNorm = 0.9990, lr_0 = 7.7386e-04
Loss = 7.5840e-02, PNorm = 130.0703, GNorm = 0.3743, lr_0 = 7.7333e-04
Loss = 8.4697e-02, PNorm = 130.1252, GNorm = 0.9432, lr_0 = 7.7280e-04
Loss = 7.0737e-02, PNorm = 130.1793, GNorm = 0.4568, lr_0 = 7.7227e-04
Loss = 7.2152e-02, PNorm = 130.2350, GNorm = 0.5510, lr_0 = 7.7174e-04
Loss = 7.2011e-02, PNorm = 130.2872, GNorm = 0.6132, lr_0 = 7.7121e-04
Loss = 7.6321e-02, PNorm = 130.3468, GNorm = 0.5136, lr_0 = 7.7068e-04
Loss = 8.1781e-02, PNorm = 130.4084, GNorm = 0.6408, lr_0 = 7.7015e-04
Loss = 6.8886e-02, PNorm = 130.4705, GNorm = 0.5573, lr_0 = 7.6963e-04
Loss = 8.1704e-02, PNorm = 130.5274, GNorm = 0.6331, lr_0 = 7.6910e-04
Loss = 6.8148e-02, PNorm = 130.5881, GNorm = 0.5070, lr_0 = 7.6857e-04
Loss = 7.3898e-02, PNorm = 130.6411, GNorm = 0.6330, lr_0 = 7.6805e-04
Loss = 1.0137e-01, PNorm = 130.7095, GNorm = 0.6782, lr_0 = 7.6752e-04
Loss = 7.8155e-02, PNorm = 130.7793, GNorm = 0.6568, lr_0 = 7.6699e-04
Loss = 8.5279e-02, PNorm = 130.8509, GNorm = 0.9366, lr_0 = 7.6647e-04
Loss = 7.3514e-02, PNorm = 130.9102, GNorm = 0.4747, lr_0 = 7.6594e-04
Loss = 8.1149e-02, PNorm = 130.9717, GNorm = 0.9721, lr_0 = 7.6542e-04
Loss = 8.2916e-02, PNorm = 131.0427, GNorm = 0.7640, lr_0 = 7.6489e-04
Loss = 1.0193e-01, PNorm = 131.1028, GNorm = 0.6507, lr_0 = 7.6437e-04
Loss = 8.1404e-02, PNorm = 131.1704, GNorm = 0.6717, lr_0 = 7.6385e-04
Loss = 6.9236e-02, PNorm = 131.2348, GNorm = 0.6031, lr_0 = 7.6332e-04
Loss = 9.6463e-02, PNorm = 131.3041, GNorm = 1.1598, lr_0 = 7.6280e-04
Loss = 9.4430e-02, PNorm = 131.3642, GNorm = 0.7754, lr_0 = 7.6228e-04
Loss = 8.2932e-02, PNorm = 131.4382, GNorm = 0.9307, lr_0 = 7.6176e-04
Loss = 8.8504e-02, PNorm = 131.5103, GNorm = 0.5022, lr_0 = 7.6123e-04
Loss = 8.3918e-02, PNorm = 131.5822, GNorm = 0.6163, lr_0 = 7.6071e-04
Loss = 9.5688e-02, PNorm = 131.6498, GNorm = 0.5566, lr_0 = 7.6019e-04
Loss = 7.7966e-02, PNorm = 131.7290, GNorm = 0.7404, lr_0 = 7.5967e-04
Loss = 8.4651e-02, PNorm = 131.7970, GNorm = 0.6156, lr_0 = 7.5915e-04
Loss = 8.7082e-02, PNorm = 131.8707, GNorm = 0.5663, lr_0 = 7.5863e-04
Loss = 9.7801e-02, PNorm = 131.9456, GNorm = 0.6037, lr_0 = 7.5811e-04
Loss = 7.2293e-02, PNorm = 132.0119, GNorm = 0.4849, lr_0 = 7.5759e-04
Loss = 8.5476e-02, PNorm = 132.0767, GNorm = 0.7223, lr_0 = 7.5707e-04
Loss = 8.6127e-02, PNorm = 132.1423, GNorm = 0.4706, lr_0 = 7.5655e-04
Loss = 7.7006e-02, PNorm = 132.2096, GNorm = 0.4303, lr_0 = 7.5603e-04
Loss = 9.4256e-02, PNorm = 132.2754, GNorm = 0.8894, lr_0 = 7.5552e-04
Loss = 8.2800e-02, PNorm = 132.3426, GNorm = 0.6102, lr_0 = 7.5500e-04
Loss = 7.9893e-02, PNorm = 132.4171, GNorm = 0.5974, lr_0 = 7.5448e-04
Loss = 7.9294e-02, PNorm = 132.4855, GNorm = 0.8487, lr_0 = 7.5397e-04
Loss = 9.5712e-02, PNorm = 132.5570, GNorm = 0.7624, lr_0 = 7.5345e-04
Loss = 8.6189e-02, PNorm = 132.6315, GNorm = 0.8739, lr_0 = 7.5293e-04
Loss = 8.5111e-02, PNorm = 132.7067, GNorm = 0.8537, lr_0 = 7.5242e-04
Loss = 9.3600e-02, PNorm = 132.7855, GNorm = 0.6162, lr_0 = 7.5190e-04
Loss = 8.3265e-02, PNorm = 132.8609, GNorm = 0.5781, lr_0 = 7.5139e-04
Loss = 8.1347e-02, PNorm = 132.9356, GNorm = 0.5107, lr_0 = 7.5087e-04
Loss = 9.2462e-02, PNorm = 133.0120, GNorm = 0.7037, lr_0 = 7.5036e-04
Loss = 8.2272e-02, PNorm = 133.0904, GNorm = 0.5739, lr_0 = 7.4984e-04
Loss = 9.0081e-02, PNorm = 133.1615, GNorm = 0.9003, lr_0 = 7.4933e-04
Loss = 7.5609e-02, PNorm = 133.2376, GNorm = 0.4804, lr_0 = 7.4882e-04
Loss = 1.0196e-01, PNorm = 133.3071, GNorm = 0.7218, lr_0 = 7.4830e-04
Loss = 8.6828e-02, PNorm = 133.3848, GNorm = 0.9446, lr_0 = 7.4779e-04
Loss = 7.5781e-02, PNorm = 133.4605, GNorm = 0.5934, lr_0 = 7.4728e-04
Loss = 9.0959e-02, PNorm = 133.5334, GNorm = 0.7811, lr_0 = 7.4677e-04
Loss = 9.0000e-02, PNorm = 133.6090, GNorm = 0.6892, lr_0 = 7.4625e-04
Loss = 9.8241e-02, PNorm = 133.6872, GNorm = 0.5334, lr_0 = 7.4574e-04
Loss = 1.0230e-01, PNorm = 133.7677, GNorm = 0.5580, lr_0 = 7.4523e-04
Loss = 9.3435e-02, PNorm = 133.8395, GNorm = 0.4837, lr_0 = 7.4472e-04
Loss = 9.0700e-02, PNorm = 133.9155, GNorm = 0.6674, lr_0 = 7.4421e-04
Loss = 8.4683e-02, PNorm = 133.9947, GNorm = 0.4914, lr_0 = 7.4370e-04
Loss = 8.2780e-02, PNorm = 134.0725, GNorm = 0.6264, lr_0 = 7.4319e-04
Loss = 8.2705e-02, PNorm = 134.1494, GNorm = 0.5692, lr_0 = 7.4268e-04
Loss = 7.9484e-02, PNorm = 134.2197, GNorm = 0.4636, lr_0 = 7.4217e-04
Loss = 8.4210e-02, PNorm = 134.2942, GNorm = 0.5809, lr_0 = 7.4167e-04
Loss = 9.8051e-02, PNorm = 134.3725, GNorm = 0.7662, lr_0 = 7.4116e-04
Loss = 1.0351e-01, PNorm = 134.4491, GNorm = 1.0025, lr_0 = 7.4065e-04
Loss = 1.0907e-01, PNorm = 134.5298, GNorm = 0.9483, lr_0 = 7.4014e-04
Loss = 9.8404e-02, PNorm = 134.6145, GNorm = 0.4886, lr_0 = 7.3964e-04
Loss = 8.4189e-02, PNorm = 134.6905, GNorm = 0.5367, lr_0 = 7.3913e-04
Loss = 8.4112e-02, PNorm = 134.7775, GNorm = 0.6453, lr_0 = 7.3862e-04
Loss = 1.0690e-01, PNorm = 134.8626, GNorm = 0.9994, lr_0 = 7.3812e-04
Loss = 7.9371e-02, PNorm = 134.9456, GNorm = 0.9296, lr_0 = 7.3761e-04
Loss = 9.8682e-02, PNorm = 135.0277, GNorm = 0.7185, lr_0 = 7.3711e-04
Loss = 9.8404e-02, PNorm = 135.1067, GNorm = 0.6116, lr_0 = 7.3660e-04
Loss = 1.0744e-01, PNorm = 135.1987, GNorm = 0.6162, lr_0 = 7.3610e-04
Loss = 9.8223e-02, PNorm = 135.2807, GNorm = 0.7370, lr_0 = 7.3559e-04
Loss = 9.8614e-02, PNorm = 135.3678, GNorm = 0.9223, lr_0 = 7.3509e-04
Loss = 9.5475e-02, PNorm = 135.4545, GNorm = 0.6673, lr_0 = 7.3458e-04
Loss = 1.0142e-01, PNorm = 135.5371, GNorm = 0.7232, lr_0 = 7.3408e-04
Loss = 9.4933e-02, PNorm = 135.6295, GNorm = 0.7730, lr_0 = 7.3358e-04
Loss = 9.4927e-02, PNorm = 135.7131, GNorm = 0.8370, lr_0 = 7.3308e-04
Loss = 9.6750e-02, PNorm = 135.7984, GNorm = 1.0543, lr_0 = 7.3257e-04
Loss = 9.3397e-02, PNorm = 135.8846, GNorm = 0.8241, lr_0 = 7.3207e-04
Loss = 9.3195e-02, PNorm = 135.9802, GNorm = 0.4611, lr_0 = 7.3157e-04
Loss = 1.0209e-01, PNorm = 136.0677, GNorm = 0.6480, lr_0 = 7.3107e-04
Loss = 8.3835e-02, PNorm = 136.1567, GNorm = 0.7971, lr_0 = 7.3057e-04
Loss = 1.0300e-01, PNorm = 136.2446, GNorm = 1.5491, lr_0 = 7.3007e-04
Loss = 8.7239e-02, PNorm = 136.3331, GNorm = 0.4379, lr_0 = 7.2957e-04
Loss = 9.1052e-02, PNorm = 136.4228, GNorm = 0.6666, lr_0 = 7.2907e-04
Loss = 1.1211e-01, PNorm = 136.5043, GNorm = 0.5999, lr_0 = 7.2857e-04
Loss = 8.9841e-02, PNorm = 136.5911, GNorm = 0.4728, lr_0 = 7.2807e-04
Loss = 9.5079e-02, PNorm = 136.6778, GNorm = 0.4045, lr_0 = 7.2757e-04
Loss = 1.0085e-01, PNorm = 136.7622, GNorm = 0.4789, lr_0 = 7.2707e-04
Loss = 9.0495e-02, PNorm = 136.8525, GNorm = 0.5834, lr_0 = 7.2657e-04
Loss = 1.1169e-01, PNorm = 136.9313, GNorm = 0.8056, lr_0 = 7.2608e-04
Loss = 1.0758e-01, PNorm = 137.0309, GNorm = 0.7084, lr_0 = 7.2558e-04
Loss = 9.8971e-02, PNorm = 137.1200, GNorm = 0.8418, lr_0 = 7.2508e-04
Loss = 1.0082e-01, PNorm = 137.2051, GNorm = 1.1468, lr_0 = 7.2458e-04
Loss = 9.9108e-02, PNorm = 137.2907, GNorm = 0.6426, lr_0 = 7.2409e-04
Loss = 8.4980e-02, PNorm = 137.3833, GNorm = 0.9007, lr_0 = 7.2359e-04
Loss = 1.0530e-01, PNorm = 137.4647, GNorm = 0.4842, lr_0 = 7.2310e-04
Loss = 1.2257e-01, PNorm = 137.5595, GNorm = 1.1711, lr_0 = 7.2260e-04
Loss = 9.2057e-02, PNorm = 137.6438, GNorm = 0.8599, lr_0 = 7.2211e-04
Loss = 1.0449e-01, PNorm = 137.7344, GNorm = 0.7049, lr_0 = 7.2161e-04
Loss = 9.0785e-02, PNorm = 137.8264, GNorm = 0.6368, lr_0 = 7.2112e-04
Loss = 1.0310e-01, PNorm = 137.9172, GNorm = 0.4687, lr_0 = 7.2062e-04
Loss = 8.4820e-02, PNorm = 138.0050, GNorm = 0.7606, lr_0 = 7.2013e-04
Loss = 1.0546e-01, PNorm = 138.0898, GNorm = 0.6635, lr_0 = 7.1964e-04
Validation mae = 0.124041
Epoch 6
Loss = 7.3352e-02, PNorm = 138.1709, GNorm = 0.4571, lr_0 = 7.1914e-04
Loss = 6.4317e-02, PNorm = 138.2401, GNorm = 0.4526, lr_0 = 7.1865e-04
Loss = 7.0396e-02, PNorm = 138.2982, GNorm = 0.4768, lr_0 = 7.1816e-04
Loss = 6.0258e-02, PNorm = 138.3495, GNorm = 0.5537, lr_0 = 7.1767e-04
Loss = 6.6530e-02, PNorm = 138.4016, GNorm = 0.4886, lr_0 = 7.1717e-04
Loss = 6.3255e-02, PNorm = 138.4603, GNorm = 0.4628, lr_0 = 7.1668e-04
Loss = 5.5361e-02, PNorm = 138.5079, GNorm = 0.6312, lr_0 = 7.1619e-04
Loss = 7.0024e-02, PNorm = 138.5537, GNorm = 0.6330, lr_0 = 7.1570e-04
Loss = 6.7649e-02, PNorm = 138.6030, GNorm = 0.5293, lr_0 = 7.1521e-04
Loss = 6.5936e-02, PNorm = 138.6431, GNorm = 0.9231, lr_0 = 7.1472e-04
Loss = 6.3941e-02, PNorm = 138.7026, GNorm = 0.4440, lr_0 = 7.1423e-04
Loss = 5.9944e-02, PNorm = 138.7532, GNorm = 0.3688, lr_0 = 7.1374e-04
Loss = 5.9811e-02, PNorm = 138.8042, GNorm = 0.4078, lr_0 = 7.1325e-04
Loss = 6.3565e-02, PNorm = 138.8545, GNorm = 0.3941, lr_0 = 7.1277e-04
Loss = 7.1209e-02, PNorm = 138.9056, GNorm = 0.8984, lr_0 = 7.1228e-04
Loss = 5.1708e-02, PNorm = 138.9644, GNorm = 0.6200, lr_0 = 7.1179e-04
Loss = 6.3937e-02, PNorm = 139.0184, GNorm = 0.5643, lr_0 = 7.1130e-04
Loss = 7.1323e-02, PNorm = 139.0704, GNorm = 1.0216, lr_0 = 7.1081e-04
Loss = 5.8915e-02, PNorm = 139.1246, GNorm = 0.7211, lr_0 = 7.1033e-04
Loss = 6.8119e-02, PNorm = 139.1744, GNorm = 0.4176, lr_0 = 7.0984e-04
Loss = 5.6388e-02, PNorm = 139.2280, GNorm = 0.4727, lr_0 = 7.0935e-04
Loss = 4.7640e-02, PNorm = 139.2748, GNorm = 0.4936, lr_0 = 7.0887e-04
Loss = 5.5442e-02, PNorm = 139.3203, GNorm = 0.7111, lr_0 = 7.0838e-04
Loss = 6.4321e-02, PNorm = 139.3733, GNorm = 0.6226, lr_0 = 7.0790e-04
Loss = 5.9075e-02, PNorm = 139.4247, GNorm = 0.4844, lr_0 = 7.0741e-04
Loss = 6.5422e-02, PNorm = 139.4796, GNorm = 0.5284, lr_0 = 7.0693e-04
Loss = 6.1044e-02, PNorm = 139.5262, GNorm = 0.3698, lr_0 = 7.0644e-04
Loss = 6.6768e-02, PNorm = 139.5810, GNorm = 0.4361, lr_0 = 7.0596e-04
Loss = 6.4570e-02, PNorm = 139.6403, GNorm = 0.5473, lr_0 = 7.0548e-04
Loss = 5.8664e-02, PNorm = 139.6917, GNorm = 0.3822, lr_0 = 7.0499e-04
Loss = 5.4680e-02, PNorm = 139.7429, GNorm = 0.2750, lr_0 = 7.0451e-04
Loss = 5.3087e-02, PNorm = 139.7977, GNorm = 0.6135, lr_0 = 7.0403e-04
Loss = 5.9688e-02, PNorm = 139.8475, GNorm = 1.5330, lr_0 = 7.0354e-04
Loss = 6.4343e-02, PNorm = 139.8958, GNorm = 0.5431, lr_0 = 7.0306e-04
Loss = 6.1797e-02, PNorm = 139.9374, GNorm = 0.5337, lr_0 = 7.0258e-04
Loss = 5.8573e-02, PNorm = 139.9912, GNorm = 1.2389, lr_0 = 7.0210e-04
Loss = 5.8472e-02, PNorm = 140.0405, GNorm = 0.3440, lr_0 = 7.0162e-04
Loss = 5.6213e-02, PNorm = 140.0933, GNorm = 0.3131, lr_0 = 7.0114e-04
Loss = 5.9403e-02, PNorm = 140.1479, GNorm = 0.4351, lr_0 = 7.0066e-04
Loss = 5.0234e-02, PNorm = 140.2003, GNorm = 0.4755, lr_0 = 7.0018e-04
Loss = 6.3034e-02, PNorm = 140.2497, GNorm = 0.2948, lr_0 = 6.9970e-04
Loss = 5.5365e-02, PNorm = 140.3023, GNorm = 0.5620, lr_0 = 6.9922e-04
Loss = 6.3694e-02, PNorm = 140.3558, GNorm = 0.6945, lr_0 = 6.9874e-04
Loss = 4.5577e-02, PNorm = 140.4114, GNorm = 0.3294, lr_0 = 6.9826e-04
Loss = 5.7351e-02, PNorm = 140.4580, GNorm = 0.3361, lr_0 = 6.9778e-04
Loss = 6.2316e-02, PNorm = 140.5073, GNorm = 0.3786, lr_0 = 6.9730e-04
Loss = 4.9490e-02, PNorm = 140.5560, GNorm = 0.4194, lr_0 = 6.9683e-04
Loss = 5.6647e-02, PNorm = 140.6089, GNorm = 0.5037, lr_0 = 6.9635e-04
Loss = 6.9195e-02, PNorm = 140.6585, GNorm = 0.9970, lr_0 = 6.9587e-04
Loss = 5.9240e-02, PNorm = 140.7200, GNorm = 0.4993, lr_0 = 6.9540e-04
Loss = 5.6253e-02, PNorm = 140.7825, GNorm = 0.5148, lr_0 = 6.9492e-04
Loss = 6.0675e-02, PNorm = 140.8356, GNorm = 0.4613, lr_0 = 6.9444e-04
Loss = 7.1068e-02, PNorm = 140.8896, GNorm = 0.7139, lr_0 = 6.9397e-04
Loss = 6.6817e-02, PNorm = 140.9561, GNorm = 0.4307, lr_0 = 6.9349e-04
Loss = 5.5012e-02, PNorm = 141.0168, GNorm = 0.7006, lr_0 = 6.9302e-04
Loss = 6.1554e-02, PNorm = 141.0742, GNorm = 0.4702, lr_0 = 6.9254e-04
Loss = 5.7295e-02, PNorm = 141.1429, GNorm = 0.5742, lr_0 = 6.9207e-04
Loss = 6.3064e-02, PNorm = 141.2044, GNorm = 0.5429, lr_0 = 6.9159e-04
Loss = 5.6869e-02, PNorm = 141.2615, GNorm = 0.3808, lr_0 = 6.9112e-04
Loss = 6.7371e-02, PNorm = 141.3291, GNorm = 0.3935, lr_0 = 6.9065e-04
Loss = 5.5754e-02, PNorm = 141.3961, GNorm = 0.6390, lr_0 = 6.9017e-04
Loss = 6.0847e-02, PNorm = 141.4618, GNorm = 0.7386, lr_0 = 6.8970e-04
Loss = 5.3957e-02, PNorm = 141.5185, GNorm = 0.3673, lr_0 = 6.8923e-04
Loss = 6.4280e-02, PNorm = 141.5760, GNorm = 0.8739, lr_0 = 6.8876e-04
Loss = 7.6235e-02, PNorm = 141.6270, GNorm = 0.5172, lr_0 = 6.8828e-04
Loss = 6.6476e-02, PNorm = 141.6947, GNorm = 0.4241, lr_0 = 6.8781e-04
Loss = 7.4073e-02, PNorm = 141.7639, GNorm = 0.3260, lr_0 = 6.8734e-04
Loss = 6.0233e-02, PNorm = 141.8276, GNorm = 0.4716, lr_0 = 6.8687e-04
Loss = 6.2900e-02, PNorm = 141.8928, GNorm = 0.9780, lr_0 = 6.8640e-04
Loss = 6.5162e-02, PNorm = 141.9567, GNorm = 0.6941, lr_0 = 6.8593e-04
Loss = 7.5437e-02, PNorm = 142.0204, GNorm = 0.7664, lr_0 = 6.8546e-04
Loss = 6.2744e-02, PNorm = 142.0892, GNorm = 0.4890, lr_0 = 6.8499e-04
Loss = 7.3641e-02, PNorm = 142.1595, GNorm = 0.8254, lr_0 = 6.8452e-04
Loss = 6.4206e-02, PNorm = 142.2263, GNorm = 0.5620, lr_0 = 6.8405e-04
Loss = 8.7358e-02, PNorm = 142.2864, GNorm = 1.0020, lr_0 = 6.8358e-04
Loss = 8.4466e-02, PNorm = 142.3591, GNorm = 0.6311, lr_0 = 6.8312e-04
Loss = 7.5779e-02, PNorm = 142.4305, GNorm = 0.5307, lr_0 = 6.8265e-04
Loss = 7.0620e-02, PNorm = 142.5024, GNorm = 0.7924, lr_0 = 6.8218e-04
Loss = 7.1805e-02, PNorm = 142.5731, GNorm = 0.5686, lr_0 = 6.8171e-04
Loss = 7.4229e-02, PNorm = 142.6380, GNorm = 0.5414, lr_0 = 6.8125e-04
Loss = 7.5101e-02, PNorm = 142.7164, GNorm = 0.8165, lr_0 = 6.8078e-04
Loss = 8.0460e-02, PNorm = 142.7819, GNorm = 0.5871, lr_0 = 6.8031e-04
Loss = 5.8914e-02, PNorm = 142.8483, GNorm = 0.7555, lr_0 = 6.7985e-04
Loss = 7.8063e-02, PNorm = 142.9147, GNorm = 0.4210, lr_0 = 6.7938e-04
Loss = 6.9296e-02, PNorm = 142.9840, GNorm = 0.6074, lr_0 = 6.7892e-04
Loss = 6.8767e-02, PNorm = 143.0520, GNorm = 0.5801, lr_0 = 6.7845e-04
Loss = 5.9772e-02, PNorm = 143.1223, GNorm = 0.3938, lr_0 = 6.7799e-04
Loss = 6.7653e-02, PNorm = 143.1902, GNorm = 0.5672, lr_0 = 6.7752e-04
Loss = 8.8097e-02, PNorm = 143.2608, GNorm = 0.6909, lr_0 = 6.7706e-04
Loss = 6.6197e-02, PNorm = 143.3282, GNorm = 0.4739, lr_0 = 6.7659e-04
Loss = 6.3257e-02, PNorm = 143.3981, GNorm = 0.4582, lr_0 = 6.7613e-04
Loss = 7.1016e-02, PNorm = 143.4574, GNorm = 0.6923, lr_0 = 6.7567e-04
Loss = 7.7580e-02, PNorm = 143.5274, GNorm = 0.4843, lr_0 = 6.7520e-04
Loss = 8.2973e-02, PNorm = 143.5977, GNorm = 0.4922, lr_0 = 6.7474e-04
Loss = 7.6618e-02, PNorm = 143.6765, GNorm = 0.6276, lr_0 = 6.7428e-04
Loss = 6.3657e-02, PNorm = 143.7505, GNorm = 0.6634, lr_0 = 6.7382e-04
Loss = 7.3984e-02, PNorm = 143.8204, GNorm = 0.7810, lr_0 = 6.7335e-04
Loss = 7.8146e-02, PNorm = 143.8981, GNorm = 0.9706, lr_0 = 6.7289e-04
Loss = 7.3681e-02, PNorm = 143.9753, GNorm = 0.8414, lr_0 = 6.7243e-04
Loss = 7.4204e-02, PNorm = 144.0434, GNorm = 0.7990, lr_0 = 6.7197e-04
Loss = 7.1929e-02, PNorm = 144.1205, GNorm = 0.6370, lr_0 = 6.7151e-04
Loss = 8.1549e-02, PNorm = 144.1893, GNorm = 0.5586, lr_0 = 6.7105e-04
Loss = 7.0699e-02, PNorm = 144.2623, GNorm = 0.5663, lr_0 = 6.7059e-04
Loss = 7.0347e-02, PNorm = 144.3411, GNorm = 0.8828, lr_0 = 6.7013e-04
Loss = 8.8995e-02, PNorm = 144.4167, GNorm = 0.9555, lr_0 = 6.6967e-04
Loss = 8.0864e-02, PNorm = 144.5041, GNorm = 0.8951, lr_0 = 6.6921e-04
Loss = 7.5136e-02, PNorm = 144.5838, GNorm = 0.5706, lr_0 = 6.6876e-04
Loss = 6.7820e-02, PNorm = 144.6568, GNorm = 0.6273, lr_0 = 6.6830e-04
Loss = 7.4103e-02, PNorm = 144.7303, GNorm = 0.8412, lr_0 = 6.6784e-04
Loss = 6.6913e-02, PNorm = 144.8034, GNorm = 0.4884, lr_0 = 6.6738e-04
Loss = 6.7621e-02, PNorm = 144.8731, GNorm = 0.8832, lr_0 = 6.6693e-04
Loss = 6.0479e-02, PNorm = 144.9318, GNorm = 0.4571, lr_0 = 6.6647e-04
Loss = 7.8285e-02, PNorm = 144.9995, GNorm = 0.6388, lr_0 = 6.6601e-04
Loss = 8.0647e-02, PNorm = 145.0668, GNorm = 0.4655, lr_0 = 6.6556e-04
Loss = 6.0186e-02, PNorm = 145.1392, GNorm = 0.4710, lr_0 = 6.6510e-04
Loss = 7.4077e-02, PNorm = 145.2066, GNorm = 0.6830, lr_0 = 6.6464e-04
Loss = 7.0093e-02, PNorm = 145.2753, GNorm = 0.6087, lr_0 = 6.6419e-04
Loss = 7.4810e-02, PNorm = 145.3549, GNorm = 0.5691, lr_0 = 6.6373e-04
Loss = 9.3906e-02, PNorm = 145.4293, GNorm = 0.4878, lr_0 = 6.6328e-04
Loss = 8.8259e-02, PNorm = 145.5045, GNorm = 0.4695, lr_0 = 6.6282e-04
Validation mae = 0.124531
Epoch 7
Loss = 6.0146e-02, PNorm = 145.5655, GNorm = 0.5822, lr_0 = 6.6237e-04
Loss = 4.7098e-02, PNorm = 145.6194, GNorm = 0.4358, lr_0 = 6.6192e-04
Loss = 4.4544e-02, PNorm = 145.6672, GNorm = 0.4006, lr_0 = 6.6146e-04
Loss = 4.6441e-02, PNorm = 145.7078, GNorm = 0.4185, lr_0 = 6.6101e-04
Loss = 4.9408e-02, PNorm = 145.7503, GNorm = 0.5576, lr_0 = 6.6056e-04
Loss = 5.6980e-02, PNorm = 145.7902, GNorm = 0.4100, lr_0 = 6.6011e-04
Loss = 5.6016e-02, PNorm = 145.8353, GNorm = 0.9232, lr_0 = 6.5965e-04
Loss = 4.9350e-02, PNorm = 145.8948, GNorm = 0.4895, lr_0 = 6.5920e-04
Loss = 4.3750e-02, PNorm = 145.9402, GNorm = 0.4376, lr_0 = 6.5875e-04
Loss = 4.5598e-02, PNorm = 145.9846, GNorm = 0.3687, lr_0 = 6.5830e-04
Loss = 4.0779e-02, PNorm = 146.0265, GNorm = 0.3890, lr_0 = 6.5785e-04
Loss = 4.3829e-02, PNorm = 146.0697, GNorm = 0.5043, lr_0 = 6.5740e-04
Loss = 5.0340e-02, PNorm = 146.1096, GNorm = 0.3699, lr_0 = 6.5695e-04
Loss = 5.5905e-02, PNorm = 146.1521, GNorm = 0.3628, lr_0 = 6.5650e-04
Loss = 6.6796e-02, PNorm = 146.2052, GNorm = 0.5032, lr_0 = 6.5605e-04
Loss = 4.5152e-02, PNorm = 146.2580, GNorm = 0.3491, lr_0 = 6.5560e-04
Loss = 4.8693e-02, PNorm = 146.3076, GNorm = 0.3616, lr_0 = 6.5515e-04
Loss = 4.8069e-02, PNorm = 146.3561, GNorm = 0.3308, lr_0 = 6.5470e-04
Loss = 4.8442e-02, PNorm = 146.4017, GNorm = 0.7688, lr_0 = 6.5425e-04
Loss = 4.3530e-02, PNorm = 146.4547, GNorm = 0.3544, lr_0 = 6.5380e-04
Loss = 4.9533e-02, PNorm = 146.4964, GNorm = 0.5034, lr_0 = 6.5335e-04
Loss = 5.0513e-02, PNorm = 146.5445, GNorm = 0.5893, lr_0 = 6.5291e-04
Loss = 4.5087e-02, PNorm = 146.5946, GNorm = 0.7447, lr_0 = 6.5246e-04
Loss = 4.4259e-02, PNorm = 146.6429, GNorm = 0.4065, lr_0 = 6.5201e-04
Loss = 5.0541e-02, PNorm = 146.6902, GNorm = 0.6661, lr_0 = 6.5157e-04
Loss = 4.9094e-02, PNorm = 146.7415, GNorm = 0.5923, lr_0 = 6.5112e-04
Loss = 4.4110e-02, PNorm = 146.7888, GNorm = 0.5626, lr_0 = 6.5067e-04
Loss = 4.3288e-02, PNorm = 146.8287, GNorm = 0.7673, lr_0 = 6.5023e-04
Loss = 4.4722e-02, PNorm = 146.8702, GNorm = 0.4201, lr_0 = 6.4978e-04
Loss = 5.4645e-02, PNorm = 146.9108, GNorm = 0.8085, lr_0 = 6.4934e-04
Loss = 4.2771e-02, PNorm = 146.9511, GNorm = 0.4442, lr_0 = 6.4889e-04
Loss = 4.4398e-02, PNorm = 146.9927, GNorm = 0.5245, lr_0 = 6.4845e-04
Loss = 4.9259e-02, PNorm = 147.0377, GNorm = 1.1593, lr_0 = 6.4800e-04
Loss = 5.1466e-02, PNorm = 147.0764, GNorm = 0.6852, lr_0 = 6.4756e-04
Loss = 4.3771e-02, PNorm = 147.1222, GNorm = 0.4794, lr_0 = 6.4712e-04
Loss = 4.8444e-02, PNorm = 147.1639, GNorm = 0.8230, lr_0 = 6.4667e-04
Loss = 4.5760e-02, PNorm = 147.2077, GNorm = 0.3890, lr_0 = 6.4623e-04
Loss = 4.8962e-02, PNorm = 147.2537, GNorm = 0.3372, lr_0 = 6.4579e-04
Loss = 4.3137e-02, PNorm = 147.2964, GNorm = 0.3290, lr_0 = 6.4534e-04
Loss = 4.2725e-02, PNorm = 147.3417, GNorm = 0.3687, lr_0 = 6.4490e-04
Loss = 4.0236e-02, PNorm = 147.3862, GNorm = 0.3040, lr_0 = 6.4446e-04
Loss = 5.2433e-02, PNorm = 147.4359, GNorm = 0.8120, lr_0 = 6.4402e-04
Loss = 4.4375e-02, PNorm = 147.4848, GNorm = 0.4113, lr_0 = 6.4358e-04
Loss = 5.6906e-02, PNorm = 147.5299, GNorm = 0.4847, lr_0 = 6.4314e-04
Loss = 5.4577e-02, PNorm = 147.5776, GNorm = 0.4676, lr_0 = 6.4270e-04
Loss = 4.8696e-02, PNorm = 147.6314, GNorm = 0.4470, lr_0 = 6.4226e-04
Loss = 4.3427e-02, PNorm = 147.6778, GNorm = 0.3773, lr_0 = 6.4182e-04
Loss = 4.8199e-02, PNorm = 147.7190, GNorm = 0.4627, lr_0 = 6.4138e-04
Loss = 4.9346e-02, PNorm = 147.7645, GNorm = 0.3162, lr_0 = 6.4094e-04
Loss = 4.8949e-02, PNorm = 147.8123, GNorm = 0.6809, lr_0 = 6.4050e-04
Loss = 5.2697e-02, PNorm = 147.8701, GNorm = 0.7544, lr_0 = 6.4006e-04
Loss = 4.5889e-02, PNorm = 147.9223, GNorm = 0.4262, lr_0 = 6.3962e-04
Loss = 4.7774e-02, PNorm = 147.9706, GNorm = 0.4188, lr_0 = 6.3918e-04
Loss = 4.4500e-02, PNorm = 148.0208, GNorm = 0.2612, lr_0 = 6.3874e-04
Loss = 5.0955e-02, PNorm = 148.0752, GNorm = 0.5759, lr_0 = 6.3831e-04
Loss = 4.6545e-02, PNorm = 148.1260, GNorm = 0.5022, lr_0 = 6.3787e-04
Loss = 5.8966e-02, PNorm = 148.1790, GNorm = 0.3876, lr_0 = 6.3743e-04
Loss = 5.5055e-02, PNorm = 148.2262, GNorm = 0.4617, lr_0 = 6.3700e-04
Loss = 4.5644e-02, PNorm = 148.2810, GNorm = 0.4835, lr_0 = 6.3656e-04
Loss = 4.2341e-02, PNorm = 148.3278, GNorm = 0.5120, lr_0 = 6.3612e-04
Loss = 5.0587e-02, PNorm = 148.3715, GNorm = 0.8201, lr_0 = 6.3569e-04
Loss = 5.0920e-02, PNorm = 148.4249, GNorm = 0.4787, lr_0 = 6.3525e-04
Loss = 5.2534e-02, PNorm = 148.4794, GNorm = 0.4241, lr_0 = 6.3482e-04
Loss = 4.8825e-02, PNorm = 148.5389, GNorm = 0.6602, lr_0 = 6.3438e-04
Loss = 4.2106e-02, PNorm = 148.5846, GNorm = 0.7000, lr_0 = 6.3395e-04
Loss = 4.6881e-02, PNorm = 148.6358, GNorm = 0.6603, lr_0 = 6.3351e-04
Loss = 4.2027e-02, PNorm = 148.6871, GNorm = 0.6189, lr_0 = 6.3308e-04
Loss = 5.8178e-02, PNorm = 148.7395, GNorm = 1.1281, lr_0 = 6.3265e-04
Loss = 4.1504e-02, PNorm = 148.7847, GNorm = 0.4853, lr_0 = 6.3221e-04
Loss = 5.4207e-02, PNorm = 148.8380, GNorm = 0.5548, lr_0 = 6.3178e-04
Loss = 4.4556e-02, PNorm = 148.8904, GNorm = 0.4001, lr_0 = 6.3135e-04
Loss = 5.2780e-02, PNorm = 148.9485, GNorm = 0.3764, lr_0 = 6.3091e-04
Loss = 4.8596e-02, PNorm = 149.0009, GNorm = 0.3143, lr_0 = 6.3048e-04
Loss = 4.5276e-02, PNorm = 149.0533, GNorm = 0.5898, lr_0 = 6.3005e-04
Loss = 5.1329e-02, PNorm = 149.1051, GNorm = 0.4863, lr_0 = 6.2962e-04
Loss = 4.3379e-02, PNorm = 149.1591, GNorm = 0.3250, lr_0 = 6.2919e-04
Loss = 5.0201e-02, PNorm = 149.2164, GNorm = 0.3729, lr_0 = 6.2876e-04
Loss = 4.2185e-02, PNorm = 149.2747, GNorm = 0.4443, lr_0 = 6.2833e-04
Loss = 5.3236e-02, PNorm = 149.3211, GNorm = 0.5046, lr_0 = 6.2789e-04
Loss = 4.8574e-02, PNorm = 149.3761, GNorm = 0.4242, lr_0 = 6.2746e-04
Loss = 5.5601e-02, PNorm = 149.4301, GNorm = 0.3068, lr_0 = 6.2703e-04
Loss = 4.1706e-02, PNorm = 149.4795, GNorm = 0.3147, lr_0 = 6.2661e-04
Loss = 5.3321e-02, PNorm = 149.5315, GNorm = 0.7110, lr_0 = 6.2618e-04
Loss = 5.1651e-02, PNorm = 149.5845, GNorm = 0.6021, lr_0 = 6.2575e-04
Loss = 4.3389e-02, PNorm = 149.6404, GNorm = 0.4944, lr_0 = 6.2532e-04
Loss = 7.3254e-02, PNorm = 149.6913, GNorm = 0.5809, lr_0 = 6.2489e-04
Loss = 5.0256e-02, PNorm = 149.7483, GNorm = 0.4755, lr_0 = 6.2446e-04
Loss = 6.2176e-02, PNorm = 149.8037, GNorm = 0.6895, lr_0 = 6.2403e-04
Loss = 5.8121e-02, PNorm = 149.8713, GNorm = 0.3237, lr_0 = 6.2361e-04
Loss = 4.7298e-02, PNorm = 149.9300, GNorm = 0.3857, lr_0 = 6.2318e-04
Loss = 5.0288e-02, PNorm = 149.9800, GNorm = 0.4393, lr_0 = 6.2275e-04
Loss = 6.1344e-02, PNorm = 150.0381, GNorm = 0.8691, lr_0 = 6.2233e-04
Loss = 4.7784e-02, PNorm = 150.1004, GNorm = 0.3131, lr_0 = 6.2190e-04
Loss = 5.5304e-02, PNorm = 150.1590, GNorm = 0.5058, lr_0 = 6.2147e-04
Loss = 5.5071e-02, PNorm = 150.2212, GNorm = 0.2627, lr_0 = 6.2105e-04
Loss = 4.2727e-02, PNorm = 150.2828, GNorm = 0.3493, lr_0 = 6.2062e-04
Loss = 6.6230e-02, PNorm = 150.3361, GNorm = 0.4347, lr_0 = 6.2020e-04
Loss = 9.0208e-02, PNorm = 150.4006, GNorm = 1.1493, lr_0 = 6.1977e-04
Loss = 5.3206e-02, PNorm = 150.4643, GNorm = 0.8556, lr_0 = 6.1935e-04
Loss = 5.3188e-02, PNorm = 150.5309, GNorm = 0.3766, lr_0 = 6.1892e-04
Loss = 5.0862e-02, PNorm = 150.5894, GNorm = 0.4777, lr_0 = 6.1850e-04
Loss = 6.0675e-02, PNorm = 150.6522, GNorm = 0.3768, lr_0 = 6.1808e-04
Loss = 4.8823e-02, PNorm = 150.7080, GNorm = 0.5688, lr_0 = 6.1765e-04
Loss = 6.0145e-02, PNorm = 150.7692, GNorm = 0.7291, lr_0 = 6.1723e-04
Loss = 4.7158e-02, PNorm = 150.8303, GNorm = 0.7532, lr_0 = 6.1681e-04
Loss = 4.5980e-02, PNorm = 150.8831, GNorm = 0.5294, lr_0 = 6.1638e-04
Loss = 4.4426e-02, PNorm = 150.9404, GNorm = 0.4413, lr_0 = 6.1596e-04
Loss = 4.4652e-02, PNorm = 150.9964, GNorm = 0.4517, lr_0 = 6.1554e-04
Loss = 6.0286e-02, PNorm = 151.0557, GNorm = 1.1076, lr_0 = 6.1512e-04
Loss = 5.4892e-02, PNorm = 151.1111, GNorm = 0.5274, lr_0 = 6.1470e-04
Loss = 6.7492e-02, PNorm = 151.1753, GNorm = 0.4379, lr_0 = 6.1428e-04
Loss = 5.2835e-02, PNorm = 151.2385, GNorm = 0.4530, lr_0 = 6.1385e-04
Loss = 5.5792e-02, PNorm = 151.3005, GNorm = 0.4361, lr_0 = 6.1343e-04
Loss = 5.5123e-02, PNorm = 151.3636, GNorm = 0.5973, lr_0 = 6.1301e-04
Loss = 6.8129e-02, PNorm = 151.4239, GNorm = 0.4465, lr_0 = 6.1259e-04
Loss = 4.7761e-02, PNorm = 151.4890, GNorm = 0.3617, lr_0 = 6.1217e-04
Loss = 5.6227e-02, PNorm = 151.5544, GNorm = 0.4787, lr_0 = 6.1175e-04
Loss = 5.6003e-02, PNorm = 151.6155, GNorm = 0.7004, lr_0 = 6.1134e-04
Loss = 4.5530e-02, PNorm = 151.6766, GNorm = 0.4310, lr_0 = 6.1092e-04
Loss = 5.5835e-02, PNorm = 151.7362, GNorm = 0.4288, lr_0 = 6.1050e-04
Validation mae = 0.123639
Epoch 8
Loss = 5.4332e-02, PNorm = 151.7898, GNorm = 0.5478, lr_0 = 6.1008e-04
Loss = 4.2582e-02, PNorm = 151.8361, GNorm = 0.3211, lr_0 = 6.0966e-04
Loss = 4.8528e-02, PNorm = 151.8701, GNorm = 0.5465, lr_0 = 6.0924e-04
Loss = 4.2262e-02, PNorm = 151.9112, GNorm = 0.6660, lr_0 = 6.0883e-04
Loss = 4.7246e-02, PNorm = 151.9516, GNorm = 0.9439, lr_0 = 6.0841e-04
Loss = 3.9741e-02, PNorm = 151.9933, GNorm = 0.3410, lr_0 = 6.0799e-04
Loss = 3.9774e-02, PNorm = 152.0324, GNorm = 0.6024, lr_0 = 6.0758e-04
Loss = 4.0635e-02, PNorm = 152.0710, GNorm = 0.4058, lr_0 = 6.0716e-04
Loss = 3.9911e-02, PNorm = 152.1090, GNorm = 0.5581, lr_0 = 6.0674e-04
Loss = 4.8768e-02, PNorm = 152.1482, GNorm = 0.3778, lr_0 = 6.0633e-04
Loss = 4.0091e-02, PNorm = 152.1868, GNorm = 0.4707, lr_0 = 6.0591e-04
Loss = 3.7680e-02, PNorm = 152.2303, GNorm = 0.3163, lr_0 = 6.0550e-04
Loss = 3.5654e-02, PNorm = 152.2710, GNorm = 0.3133, lr_0 = 6.0508e-04
Loss = 3.9046e-02, PNorm = 152.3162, GNorm = 0.4347, lr_0 = 6.0467e-04
Loss = 4.2343e-02, PNorm = 152.3554, GNorm = 0.2936, lr_0 = 6.0425e-04
Loss = 3.7575e-02, PNorm = 152.3942, GNorm = 0.3994, lr_0 = 6.0384e-04
Loss = 4.2887e-02, PNorm = 152.4312, GNorm = 0.3902, lr_0 = 6.0343e-04
Loss = 3.2587e-02, PNorm = 152.4651, GNorm = 0.5411, lr_0 = 6.0301e-04
Loss = 3.1234e-02, PNorm = 152.4981, GNorm = 0.3181, lr_0 = 6.0260e-04
Loss = 3.7691e-02, PNorm = 152.5301, GNorm = 0.3312, lr_0 = 6.0219e-04
Loss = 3.6017e-02, PNorm = 152.5679, GNorm = 0.3604, lr_0 = 6.0178e-04
Loss = 3.2850e-02, PNorm = 152.6088, GNorm = 0.5751, lr_0 = 6.0136e-04
Loss = 3.9101e-02, PNorm = 152.6430, GNorm = 0.5378, lr_0 = 6.0095e-04
Loss = 3.4008e-02, PNorm = 152.6859, GNorm = 0.4707, lr_0 = 6.0054e-04
Loss = 3.6635e-02, PNorm = 152.7279, GNorm = 0.2565, lr_0 = 6.0013e-04
Loss = 3.2104e-02, PNorm = 152.7654, GNorm = 0.6350, lr_0 = 5.9972e-04
Loss = 4.3302e-02, PNorm = 152.8031, GNorm = 0.4815, lr_0 = 5.9931e-04
Loss = 3.7466e-02, PNorm = 152.8398, GNorm = 0.4876, lr_0 = 5.9890e-04
Loss = 4.8357e-02, PNorm = 152.8838, GNorm = 0.4364, lr_0 = 5.9849e-04
Loss = 4.3234e-02, PNorm = 152.9203, GNorm = 0.6743, lr_0 = 5.9808e-04
Loss = 3.4501e-02, PNorm = 152.9600, GNorm = 0.4584, lr_0 = 5.9767e-04
Loss = 4.0450e-02, PNorm = 153.0025, GNorm = 0.3486, lr_0 = 5.9726e-04
Loss = 4.3498e-02, PNorm = 153.0372, GNorm = 0.4325, lr_0 = 5.9685e-04
Loss = 4.4070e-02, PNorm = 153.0774, GNorm = 0.2897, lr_0 = 5.9644e-04
Loss = 4.4025e-02, PNorm = 153.1155, GNorm = 0.3509, lr_0 = 5.9603e-04
Loss = 3.6734e-02, PNorm = 153.1553, GNorm = 0.4042, lr_0 = 5.9562e-04
Loss = 4.6541e-02, PNorm = 153.1928, GNorm = 0.9777, lr_0 = 5.9521e-04
Loss = 4.2499e-02, PNorm = 153.2377, GNorm = 0.3877, lr_0 = 5.9481e-04
Loss = 3.7900e-02, PNorm = 153.2792, GNorm = 0.6529, lr_0 = 5.9440e-04
Loss = 3.6256e-02, PNorm = 153.3205, GNorm = 0.3225, lr_0 = 5.9399e-04
Loss = 2.8576e-02, PNorm = 153.3590, GNorm = 0.3078, lr_0 = 5.9358e-04
Loss = 3.3912e-02, PNorm = 153.3991, GNorm = 0.5814, lr_0 = 5.9318e-04
Loss = 4.1237e-02, PNorm = 153.4364, GNorm = 0.8056, lr_0 = 5.9277e-04
Loss = 4.6905e-02, PNorm = 153.4686, GNorm = 0.3904, lr_0 = 5.9236e-04
Loss = 4.1994e-02, PNorm = 153.5154, GNorm = 0.4301, lr_0 = 5.9196e-04
Loss = 3.7721e-02, PNorm = 153.5605, GNorm = 0.4190, lr_0 = 5.9155e-04
Loss = 4.3901e-02, PNorm = 153.6018, GNorm = 0.4202, lr_0 = 5.9115e-04
Loss = 4.1292e-02, PNorm = 153.6486, GNorm = 0.3549, lr_0 = 5.9074e-04
Loss = 3.7626e-02, PNorm = 153.6954, GNorm = 0.4727, lr_0 = 5.9034e-04
Loss = 3.7588e-02, PNorm = 153.7362, GNorm = 0.7813, lr_0 = 5.8993e-04
Loss = 3.5970e-02, PNorm = 153.7835, GNorm = 0.5525, lr_0 = 5.8953e-04
Loss = 4.0176e-02, PNorm = 153.8235, GNorm = 0.5890, lr_0 = 5.8913e-04
Loss = 3.5582e-02, PNorm = 153.8662, GNorm = 0.7254, lr_0 = 5.8872e-04
Loss = 3.4008e-02, PNorm = 153.9074, GNorm = 0.3676, lr_0 = 5.8832e-04
Loss = 3.6892e-02, PNorm = 153.9492, GNorm = 0.3620, lr_0 = 5.8792e-04
Loss = 5.4156e-02, PNorm = 153.9890, GNorm = 0.7030, lr_0 = 5.8751e-04
Loss = 4.4250e-02, PNorm = 154.0339, GNorm = 0.2931, lr_0 = 5.8711e-04
Loss = 4.3376e-02, PNorm = 154.0833, GNorm = 0.6274, lr_0 = 5.8671e-04
Loss = 4.0767e-02, PNorm = 154.1291, GNorm = 0.4341, lr_0 = 5.8631e-04
Loss = 4.7193e-02, PNorm = 154.1784, GNorm = 0.8572, lr_0 = 5.8591e-04
Loss = 4.4895e-02, PNorm = 154.2193, GNorm = 0.6078, lr_0 = 5.8550e-04
Loss = 3.8283e-02, PNorm = 154.2634, GNorm = 0.3834, lr_0 = 5.8510e-04
Loss = 3.9719e-02, PNorm = 154.3031, GNorm = 0.4128, lr_0 = 5.8470e-04
Loss = 3.3725e-02, PNorm = 154.3415, GNorm = 0.4190, lr_0 = 5.8430e-04
Loss = 3.8523e-02, PNorm = 154.3871, GNorm = 0.4660, lr_0 = 5.8390e-04
Loss = 3.2302e-02, PNorm = 154.4307, GNorm = 0.3448, lr_0 = 5.8350e-04
Loss = 3.6793e-02, PNorm = 154.4718, GNorm = 0.3974, lr_0 = 5.8310e-04
Loss = 4.0440e-02, PNorm = 154.5199, GNorm = 0.3407, lr_0 = 5.8270e-04
Loss = 3.8488e-02, PNorm = 154.5641, GNorm = 0.3479, lr_0 = 5.8230e-04
Loss = 3.4559e-02, PNorm = 154.6115, GNorm = 0.5898, lr_0 = 5.8190e-04
Loss = 4.5537e-02, PNorm = 154.6512, GNorm = 0.4065, lr_0 = 5.8151e-04
Loss = 4.3775e-02, PNorm = 154.6973, GNorm = 0.5846, lr_0 = 5.8111e-04
Loss = 4.8352e-02, PNorm = 154.7446, GNorm = 0.3307, lr_0 = 5.8071e-04
Loss = 5.0274e-02, PNorm = 154.7875, GNorm = 0.5721, lr_0 = 5.8031e-04
Loss = 3.9594e-02, PNorm = 154.8341, GNorm = 0.6363, lr_0 = 5.7991e-04
Loss = 4.5707e-02, PNorm = 154.8743, GNorm = 0.5371, lr_0 = 5.7952e-04
Loss = 3.6610e-02, PNorm = 154.9220, GNorm = 0.4897, lr_0 = 5.7912e-04
Loss = 4.0592e-02, PNorm = 154.9620, GNorm = 0.4955, lr_0 = 5.7872e-04
Loss = 5.6169e-02, PNorm = 155.0118, GNorm = 0.5280, lr_0 = 5.7833e-04
Loss = 4.1415e-02, PNorm = 155.0638, GNorm = 0.3525, lr_0 = 5.7793e-04
Loss = 3.8389e-02, PNorm = 155.1190, GNorm = 0.3502, lr_0 = 5.7753e-04
Loss = 3.3605e-02, PNorm = 155.1692, GNorm = 0.2344, lr_0 = 5.7714e-04
Loss = 3.6622e-02, PNorm = 155.2192, GNorm = 0.3528, lr_0 = 5.7674e-04
Loss = 4.3313e-02, PNorm = 155.2612, GNorm = 0.8255, lr_0 = 5.7635e-04
Loss = 3.4571e-02, PNorm = 155.3113, GNorm = 0.3233, lr_0 = 5.7595e-04
Loss = 3.1319e-02, PNorm = 155.3566, GNorm = 0.6725, lr_0 = 5.7556e-04
Loss = 3.6202e-02, PNorm = 155.4044, GNorm = 0.2787, lr_0 = 5.7516e-04
Loss = 4.5586e-02, PNorm = 155.4552, GNorm = 0.7567, lr_0 = 5.7477e-04
Loss = 3.7810e-02, PNorm = 155.5009, GNorm = 0.6745, lr_0 = 5.7438e-04
Loss = 3.9067e-02, PNorm = 155.5485, GNorm = 0.2898, lr_0 = 5.7398e-04
Loss = 3.9717e-02, PNorm = 155.5955, GNorm = 0.4216, lr_0 = 5.7359e-04
Loss = 4.2125e-02, PNorm = 155.6371, GNorm = 0.2469, lr_0 = 5.7320e-04
Loss = 4.8163e-02, PNorm = 155.6825, GNorm = 0.8464, lr_0 = 5.7280e-04
Loss = 3.7772e-02, PNorm = 155.7285, GNorm = 0.6487, lr_0 = 5.7241e-04
Loss = 5.3399e-02, PNorm = 155.7779, GNorm = 0.5677, lr_0 = 5.7202e-04
Loss = 4.1175e-02, PNorm = 155.8338, GNorm = 0.5682, lr_0 = 5.7163e-04
Loss = 3.8720e-02, PNorm = 155.8869, GNorm = 0.6245, lr_0 = 5.7124e-04
Loss = 3.0092e-02, PNorm = 155.9359, GNorm = 0.5035, lr_0 = 5.7084e-04
Loss = 4.7839e-02, PNorm = 155.9811, GNorm = 0.5033, lr_0 = 5.7045e-04
Loss = 4.6404e-02, PNorm = 156.0306, GNorm = 0.4039, lr_0 = 5.7006e-04
Loss = 4.1139e-02, PNorm = 156.0796, GNorm = 0.3870, lr_0 = 5.6967e-04
Loss = 4.8473e-02, PNorm = 156.1274, GNorm = 0.5793, lr_0 = 5.6928e-04
Loss = 4.3072e-02, PNorm = 156.1800, GNorm = 0.3672, lr_0 = 5.6889e-04
Loss = 4.0035e-02, PNorm = 156.2297, GNorm = 0.7273, lr_0 = 5.6850e-04
Loss = 5.0235e-02, PNorm = 156.2808, GNorm = 0.5881, lr_0 = 5.6811e-04
Loss = 4.0957e-02, PNorm = 156.3305, GNorm = 0.5113, lr_0 = 5.6772e-04
Loss = 5.1904e-02, PNorm = 156.3873, GNorm = 0.4352, lr_0 = 5.6733e-04
Loss = 4.7608e-02, PNorm = 156.4395, GNorm = 0.4617, lr_0 = 5.6695e-04
Loss = 4.0233e-02, PNorm = 156.4949, GNorm = 0.3792, lr_0 = 5.6656e-04
Loss = 4.2987e-02, PNorm = 156.5492, GNorm = 0.5644, lr_0 = 5.6617e-04
Loss = 4.8006e-02, PNorm = 156.6034, GNorm = 0.8089, lr_0 = 5.6578e-04
Loss = 4.3922e-02, PNorm = 156.6609, GNorm = 0.5247, lr_0 = 5.6539e-04
Loss = 3.9025e-02, PNorm = 156.7172, GNorm = 0.5122, lr_0 = 5.6501e-04
Loss = 3.4192e-02, PNorm = 156.7654, GNorm = 0.4899, lr_0 = 5.6462e-04
Loss = 3.3075e-02, PNorm = 156.8169, GNorm = 0.3766, lr_0 = 5.6423e-04
Loss = 3.4078e-02, PNorm = 156.8611, GNorm = 0.5552, lr_0 = 5.6385e-04
Loss = 4.1342e-02, PNorm = 156.9039, GNorm = 0.2766, lr_0 = 5.6346e-04
Loss = 4.0467e-02, PNorm = 156.9455, GNorm = 0.4192, lr_0 = 5.6307e-04
Loss = 4.4267e-02, PNorm = 156.9913, GNorm = 0.4131, lr_0 = 5.6269e-04
Loss = 5.7681e-02, PNorm = 157.0414, GNorm = 0.7082, lr_0 = 5.6230e-04
Validation mae = 0.123548
Epoch 9
Loss = 3.6570e-02, PNorm = 157.0831, GNorm = 0.5231, lr_0 = 5.6192e-04
Loss = 3.1790e-02, PNorm = 157.1217, GNorm = 0.3030, lr_0 = 5.6153e-04
Loss = 3.9616e-02, PNorm = 157.1561, GNorm = 0.7446, lr_0 = 5.6115e-04
Loss = 4.0024e-02, PNorm = 157.1891, GNorm = 0.5444, lr_0 = 5.6076e-04
Loss = 3.4859e-02, PNorm = 157.2232, GNorm = 0.3620, lr_0 = 5.6038e-04
Loss = 3.1786e-02, PNorm = 157.2588, GNorm = 0.4732, lr_0 = 5.6000e-04
Loss = 2.9116e-02, PNorm = 157.2878, GNorm = 0.4360, lr_0 = 5.5961e-04
Loss = 3.4897e-02, PNorm = 157.3240, GNorm = 0.6128, lr_0 = 5.5923e-04
Loss = 3.5388e-02, PNorm = 157.3542, GNorm = 0.2625, lr_0 = 5.5885e-04
Loss = 2.7658e-02, PNorm = 157.3888, GNorm = 0.4997, lr_0 = 5.5846e-04
Loss = 3.1402e-02, PNorm = 157.4165, GNorm = 0.3110, lr_0 = 5.5808e-04
Loss = 2.9908e-02, PNorm = 157.4478, GNorm = 0.6154, lr_0 = 5.5770e-04
Loss = 4.1151e-02, PNorm = 157.4831, GNorm = 0.8934, lr_0 = 5.5732e-04
Loss = 4.0973e-02, PNorm = 157.5204, GNorm = 0.8339, lr_0 = 5.5693e-04
Loss = 3.7569e-02, PNorm = 157.5590, GNorm = 0.6018, lr_0 = 5.5655e-04
Loss = 3.5713e-02, PNorm = 157.5915, GNorm = 0.6002, lr_0 = 5.5617e-04
Loss = 2.9428e-02, PNorm = 157.6271, GNorm = 0.4300, lr_0 = 5.5579e-04
Loss = 3.2984e-02, PNorm = 157.6680, GNorm = 0.6029, lr_0 = 5.5541e-04
Loss = 3.9343e-02, PNorm = 157.7023, GNorm = 0.7743, lr_0 = 5.5503e-04
Loss = 2.9367e-02, PNorm = 157.7443, GNorm = 0.2742, lr_0 = 5.5465e-04
Loss = 3.1082e-02, PNorm = 157.7805, GNorm = 0.5512, lr_0 = 5.5427e-04
Loss = 3.1128e-02, PNorm = 157.8172, GNorm = 0.2430, lr_0 = 5.5389e-04
Loss = 3.1887e-02, PNorm = 157.8526, GNorm = 0.5369, lr_0 = 5.5351e-04
Loss = 2.9184e-02, PNorm = 157.8864, GNorm = 0.3495, lr_0 = 5.5313e-04
Loss = 2.9930e-02, PNorm = 157.9223, GNorm = 0.4360, lr_0 = 5.5275e-04
Loss = 2.7130e-02, PNorm = 157.9566, GNorm = 0.2237, lr_0 = 5.5237e-04
Loss = 3.9093e-02, PNorm = 157.9904, GNorm = 0.3272, lr_0 = 5.5199e-04
Loss = 2.8772e-02, PNorm = 158.0264, GNorm = 0.3116, lr_0 = 5.5162e-04
Loss = 3.0948e-02, PNorm = 158.0609, GNorm = 0.4582, lr_0 = 5.5124e-04
Loss = 2.8070e-02, PNorm = 158.0935, GNorm = 0.6273, lr_0 = 5.5086e-04
Loss = 2.9652e-02, PNorm = 158.1253, GNorm = 0.3545, lr_0 = 5.5048e-04
Loss = 2.6538e-02, PNorm = 158.1510, GNorm = 0.4080, lr_0 = 5.5011e-04
Loss = 2.5228e-02, PNorm = 158.1840, GNorm = 0.3184, lr_0 = 5.4973e-04
Loss = 2.9897e-02, PNorm = 158.2165, GNorm = 0.6791, lr_0 = 5.4935e-04
Loss = 3.5016e-02, PNorm = 158.2529, GNorm = 0.4020, lr_0 = 5.4898e-04
Loss = 3.0310e-02, PNorm = 158.2862, GNorm = 0.4245, lr_0 = 5.4860e-04
Loss = 3.6378e-02, PNorm = 158.3257, GNorm = 0.3399, lr_0 = 5.4822e-04
Loss = 3.0782e-02, PNorm = 158.3616, GNorm = 0.2490, lr_0 = 5.4785e-04
Loss = 3.3824e-02, PNorm = 158.3934, GNorm = 0.3433, lr_0 = 5.4747e-04
Loss = 2.8859e-02, PNorm = 158.4294, GNorm = 0.2546, lr_0 = 5.4710e-04
Loss = 4.1334e-02, PNorm = 158.4655, GNorm = 0.3725, lr_0 = 5.4672e-04
Loss = 3.0922e-02, PNorm = 158.5060, GNorm = 0.4706, lr_0 = 5.4635e-04
Loss = 3.2499e-02, PNorm = 158.5431, GNorm = 0.2815, lr_0 = 5.4597e-04
Loss = 3.3668e-02, PNorm = 158.5796, GNorm = 0.3221, lr_0 = 5.4560e-04
Loss = 3.1320e-02, PNorm = 158.6153, GNorm = 0.3633, lr_0 = 5.4523e-04
Loss = 3.1543e-02, PNorm = 158.6524, GNorm = 0.6307, lr_0 = 5.4485e-04
Loss = 2.8899e-02, PNorm = 158.6900, GNorm = 0.8428, lr_0 = 5.4448e-04
Loss = 2.9521e-02, PNorm = 158.7271, GNorm = 0.6143, lr_0 = 5.4411e-04
Loss = 3.6154e-02, PNorm = 158.7623, GNorm = 0.4502, lr_0 = 5.4373e-04
Loss = 3.6061e-02, PNorm = 158.7956, GNorm = 0.4535, lr_0 = 5.4336e-04
Loss = 3.4445e-02, PNorm = 158.8337, GNorm = 0.5524, lr_0 = 5.4299e-04
Loss = 2.9804e-02, PNorm = 158.8789, GNorm = 0.4429, lr_0 = 5.4262e-04
Loss = 3.5342e-02, PNorm = 158.9199, GNorm = 0.2672, lr_0 = 5.4225e-04
Loss = 3.2830e-02, PNorm = 158.9600, GNorm = 0.5974, lr_0 = 5.4187e-04
Loss = 4.0031e-02, PNorm = 159.0002, GNorm = 0.4163, lr_0 = 5.4150e-04
Loss = 3.3905e-02, PNorm = 159.0467, GNorm = 0.4021, lr_0 = 5.4113e-04
Loss = 3.0149e-02, PNorm = 159.0919, GNorm = 0.5644, lr_0 = 5.4076e-04
Loss = 3.0767e-02, PNorm = 159.1327, GNorm = 0.6585, lr_0 = 5.4039e-04
Loss = 3.3271e-02, PNorm = 159.1676, GNorm = 0.6362, lr_0 = 5.4002e-04
Loss = 2.9287e-02, PNorm = 159.2108, GNorm = 0.4633, lr_0 = 5.3965e-04
Loss = 2.5783e-02, PNorm = 159.2503, GNorm = 0.2750, lr_0 = 5.3928e-04
Loss = 4.8842e-02, PNorm = 159.2956, GNorm = 0.7867, lr_0 = 5.3891e-04
Loss = 4.4641e-02, PNorm = 159.3392, GNorm = 0.4473, lr_0 = 5.3854e-04
Loss = 3.9990e-02, PNorm = 159.3883, GNorm = 0.6190, lr_0 = 5.3817e-04
Loss = 3.3192e-02, PNorm = 159.4336, GNorm = 1.1021, lr_0 = 5.3781e-04
Loss = 3.3088e-02, PNorm = 159.4777, GNorm = 0.5795, lr_0 = 5.3744e-04
Loss = 2.9666e-02, PNorm = 159.5186, GNorm = 0.5530, lr_0 = 5.3707e-04
Loss = 4.2621e-02, PNorm = 159.5586, GNorm = 0.2656, lr_0 = 5.3670e-04
Loss = 3.5570e-02, PNorm = 159.5942, GNorm = 0.4421, lr_0 = 5.3633e-04
Loss = 2.9220e-02, PNorm = 159.6351, GNorm = 0.4762, lr_0 = 5.3597e-04
Loss = 3.6837e-02, PNorm = 159.6764, GNorm = 0.3400, lr_0 = 5.3560e-04
Loss = 2.6380e-02, PNorm = 159.7242, GNorm = 0.4150, lr_0 = 5.3523e-04
Loss = 2.8083e-02, PNorm = 159.7657, GNorm = 0.3416, lr_0 = 5.3486e-04
Loss = 4.8099e-02, PNorm = 159.8075, GNorm = 0.3106, lr_0 = 5.3450e-04
Loss = 3.2503e-02, PNorm = 159.8456, GNorm = 0.3395, lr_0 = 5.3413e-04
Loss = 3.5319e-02, PNorm = 159.8820, GNorm = 0.7490, lr_0 = 5.3377e-04
Loss = 3.3546e-02, PNorm = 159.9168, GNorm = 0.6208, lr_0 = 5.3340e-04
Loss = 2.5526e-02, PNorm = 159.9495, GNorm = 0.5124, lr_0 = 5.3304e-04
Loss = 3.4437e-02, PNorm = 159.9845, GNorm = 0.1474, lr_0 = 5.3267e-04
Loss = 3.0355e-02, PNorm = 160.0264, GNorm = 0.2564, lr_0 = 5.3231e-04
Loss = 3.9697e-02, PNorm = 160.0675, GNorm = 0.6022, lr_0 = 5.3194e-04
Loss = 3.4191e-02, PNorm = 160.1070, GNorm = 0.6427, lr_0 = 5.3158e-04
Loss = 3.4523e-02, PNorm = 160.1515, GNorm = 0.3560, lr_0 = 5.3121e-04
Loss = 3.6469e-02, PNorm = 160.1972, GNorm = 0.9048, lr_0 = 5.3085e-04
Loss = 3.0701e-02, PNorm = 160.2406, GNorm = 0.3912, lr_0 = 5.3048e-04
Loss = 3.0762e-02, PNorm = 160.2808, GNorm = 0.5719, lr_0 = 5.3012e-04
Loss = 2.8272e-02, PNorm = 160.3218, GNorm = 0.7221, lr_0 = 5.2976e-04
Loss = 2.7778e-02, PNorm = 160.3645, GNorm = 0.4005, lr_0 = 5.2939e-04
Loss = 2.6357e-02, PNorm = 160.4057, GNorm = 0.3323, lr_0 = 5.2903e-04
Loss = 3.4154e-02, PNorm = 160.4424, GNorm = 0.3937, lr_0 = 5.2867e-04
Loss = 3.2009e-02, PNorm = 160.4791, GNorm = 0.5913, lr_0 = 5.2831e-04
Loss = 3.9211e-02, PNorm = 160.5213, GNorm = 0.6869, lr_0 = 5.2795e-04
Loss = 3.1621e-02, PNorm = 160.5656, GNorm = 0.4330, lr_0 = 5.2758e-04
Loss = 2.9985e-02, PNorm = 160.6053, GNorm = 0.4985, lr_0 = 5.2722e-04
Loss = 2.7387e-02, PNorm = 160.6442, GNorm = 0.2263, lr_0 = 5.2686e-04
Loss = 3.7650e-02, PNorm = 160.6802, GNorm = 0.5180, lr_0 = 5.2650e-04
Loss = 3.4287e-02, PNorm = 160.7179, GNorm = 0.3710, lr_0 = 5.2614e-04
Loss = 3.8277e-02, PNorm = 160.7527, GNorm = 0.5654, lr_0 = 5.2578e-04
Loss = 3.6518e-02, PNorm = 160.7981, GNorm = 0.5496, lr_0 = 5.2542e-04
Loss = 3.9043e-02, PNorm = 160.8431, GNorm = 0.4334, lr_0 = 5.2506e-04
Loss = 3.4079e-02, PNorm = 160.8873, GNorm = 0.8508, lr_0 = 5.2470e-04
Loss = 3.7300e-02, PNorm = 160.9395, GNorm = 0.3438, lr_0 = 5.2434e-04
Loss = 2.8775e-02, PNorm = 160.9873, GNorm = 0.3244, lr_0 = 5.2398e-04
Loss = 2.8282e-02, PNorm = 161.0258, GNorm = 0.3388, lr_0 = 5.2362e-04
Loss = 3.0992e-02, PNorm = 161.0652, GNorm = 0.3109, lr_0 = 5.2326e-04
Loss = 3.8061e-02, PNorm = 161.1053, GNorm = 0.3560, lr_0 = 5.2290e-04
Loss = 3.3209e-02, PNorm = 161.1485, GNorm = 0.3451, lr_0 = 5.2255e-04
Loss = 4.5298e-02, PNorm = 161.1906, GNorm = 0.4982, lr_0 = 5.2219e-04
Loss = 3.3092e-02, PNorm = 161.2353, GNorm = 0.6898, lr_0 = 5.2183e-04
Loss = 3.9130e-02, PNorm = 161.2777, GNorm = 0.2744, lr_0 = 5.2147e-04
Loss = 3.1365e-02, PNorm = 161.3181, GNorm = 0.7654, lr_0 = 5.2112e-04
Loss = 3.0322e-02, PNorm = 161.3575, GNorm = 0.8496, lr_0 = 5.2076e-04
Loss = 5.1850e-02, PNorm = 161.3997, GNorm = 0.6427, lr_0 = 5.2040e-04
Loss = 3.5551e-02, PNorm = 161.4384, GNorm = 0.6805, lr_0 = 5.2005e-04
Loss = 3.8351e-02, PNorm = 161.4792, GNorm = 0.4176, lr_0 = 5.1969e-04
Loss = 3.6864e-02, PNorm = 161.5233, GNorm = 0.4851, lr_0 = 5.1933e-04
Loss = 3.0826e-02, PNorm = 161.5591, GNorm = 0.3583, lr_0 = 5.1898e-04
Loss = 3.6813e-02, PNorm = 161.6061, GNorm = 0.2990, lr_0 = 5.1862e-04
Loss = 3.3210e-02, PNorm = 161.6497, GNorm = 0.4005, lr_0 = 5.1827e-04
Loss = 4.0239e-02, PNorm = 161.6905, GNorm = 0.2797, lr_0 = 5.1791e-04
Validation mae = 0.122290
Epoch 10
Loss = 2.6143e-02, PNorm = 161.7276, GNorm = 0.6977, lr_0 = 5.1756e-04
Loss = 2.9350e-02, PNorm = 161.7596, GNorm = 0.4046, lr_0 = 5.1720e-04
Loss = 3.5555e-02, PNorm = 161.7929, GNorm = 0.2745, lr_0 = 5.1685e-04
Loss = 2.8505e-02, PNorm = 161.8158, GNorm = 0.5484, lr_0 = 5.1649e-04
Loss = 2.8774e-02, PNorm = 161.8521, GNorm = 0.3321, lr_0 = 5.1614e-04
Loss = 3.1370e-02, PNorm = 161.8796, GNorm = 0.3325, lr_0 = 5.1579e-04
Loss = 2.6548e-02, PNorm = 161.9078, GNorm = 0.1557, lr_0 = 5.1543e-04
Loss = 2.6414e-02, PNorm = 161.9348, GNorm = 0.3418, lr_0 = 5.1508e-04
Loss = 2.6496e-02, PNorm = 161.9612, GNorm = 0.4703, lr_0 = 5.1473e-04
Loss = 3.4962e-02, PNorm = 161.9861, GNorm = 0.3415, lr_0 = 5.1437e-04
Loss = 2.3992e-02, PNorm = 162.0127, GNorm = 0.3222, lr_0 = 5.1402e-04
Loss = 2.9359e-02, PNorm = 162.0362, GNorm = 0.4671, lr_0 = 5.1367e-04
Loss = 2.4009e-02, PNorm = 162.0642, GNorm = 0.2071, lr_0 = 5.1332e-04
Loss = 2.5648e-02, PNorm = 162.0913, GNorm = 0.6529, lr_0 = 5.1297e-04
Loss = 2.6716e-02, PNorm = 162.1201, GNorm = 0.6226, lr_0 = 5.1262e-04
Loss = 2.8475e-02, PNorm = 162.1491, GNorm = 0.2539, lr_0 = 5.1226e-04
Loss = 3.2436e-02, PNorm = 162.1789, GNorm = 0.2328, lr_0 = 5.1191e-04
Loss = 2.3861e-02, PNorm = 162.2050, GNorm = 0.2482, lr_0 = 5.1156e-04
Loss = 2.4186e-02, PNorm = 162.2333, GNorm = 0.2647, lr_0 = 5.1121e-04
Loss = 2.7859e-02, PNorm = 162.2559, GNorm = 0.3818, lr_0 = 5.1086e-04
Loss = 3.0114e-02, PNorm = 162.2827, GNorm = 0.3933, lr_0 = 5.1051e-04
Loss = 2.5301e-02, PNorm = 162.3094, GNorm = 0.4576, lr_0 = 5.1016e-04
Loss = 3.7031e-02, PNorm = 162.3432, GNorm = 0.2824, lr_0 = 5.0981e-04
Loss = 2.3746e-02, PNorm = 162.3755, GNorm = 0.2496, lr_0 = 5.0946e-04
Loss = 2.6786e-02, PNorm = 162.4042, GNorm = 0.3654, lr_0 = 5.0911e-04
Loss = 2.7502e-02, PNorm = 162.4330, GNorm = 0.3261, lr_0 = 5.0877e-04
Loss = 2.9168e-02, PNorm = 162.4598, GNorm = 0.4220, lr_0 = 5.0842e-04
Loss = 3.0878e-02, PNorm = 162.4916, GNorm = 0.6814, lr_0 = 5.0807e-04
Loss = 2.3991e-02, PNorm = 162.5198, GNorm = 0.2524, lr_0 = 5.0772e-04
Loss = 2.4224e-02, PNorm = 162.5488, GNorm = 0.3814, lr_0 = 5.0737e-04
Loss = 2.3566e-02, PNorm = 162.5748, GNorm = 0.2665, lr_0 = 5.0703e-04
Loss = 3.0207e-02, PNorm = 162.6029, GNorm = 0.5830, lr_0 = 5.0668e-04
Loss = 2.4968e-02, PNorm = 162.6357, GNorm = 0.6325, lr_0 = 5.0633e-04
Loss = 2.6680e-02, PNorm = 162.6723, GNorm = 0.2398, lr_0 = 5.0598e-04
Loss = 2.7784e-02, PNorm = 162.7034, GNorm = 0.2940, lr_0 = 5.0564e-04
Loss = 2.8246e-02, PNorm = 162.7358, GNorm = 0.3907, lr_0 = 5.0529e-04
Loss = 2.5904e-02, PNorm = 162.7687, GNorm = 0.3870, lr_0 = 5.0494e-04
Loss = 2.3527e-02, PNorm = 162.8005, GNorm = 0.4081, lr_0 = 5.0460e-04
Loss = 2.4499e-02, PNorm = 162.8324, GNorm = 0.1657, lr_0 = 5.0425e-04
Loss = 3.0387e-02, PNorm = 162.8606, GNorm = 0.3040, lr_0 = 5.0391e-04
Loss = 3.6401e-02, PNorm = 162.8935, GNorm = 0.2240, lr_0 = 5.0356e-04
Loss = 2.6835e-02, PNorm = 162.9236, GNorm = 0.3644, lr_0 = 5.0322e-04
Loss = 3.3711e-02, PNorm = 162.9580, GNorm = 0.3901, lr_0 = 5.0287e-04
Loss = 2.4841e-02, PNorm = 162.9933, GNorm = 0.3953, lr_0 = 5.0253e-04
Loss = 3.4385e-02, PNorm = 163.0230, GNorm = 0.2225, lr_0 = 5.0218e-04
Loss = 3.4591e-02, PNorm = 163.0488, GNorm = 0.3435, lr_0 = 5.0184e-04
Loss = 3.1193e-02, PNorm = 163.0819, GNorm = 0.4237, lr_0 = 5.0150e-04
Loss = 2.7649e-02, PNorm = 163.1127, GNorm = 0.5128, lr_0 = 5.0115e-04
Loss = 2.7159e-02, PNorm = 163.1472, GNorm = 0.6214, lr_0 = 5.0081e-04
Loss = 3.8847e-02, PNorm = 163.1747, GNorm = 0.3956, lr_0 = 5.0047e-04
Loss = 2.4201e-02, PNorm = 163.2077, GNorm = 0.4578, lr_0 = 5.0012e-04
Loss = 3.2389e-02, PNorm = 163.2377, GNorm = 0.2450, lr_0 = 4.9978e-04
Loss = 3.2367e-02, PNorm = 163.2686, GNorm = 0.2423, lr_0 = 4.9944e-04
Loss = 2.8824e-02, PNorm = 163.2994, GNorm = 0.4320, lr_0 = 4.9910e-04
Loss = 3.3640e-02, PNorm = 163.3285, GNorm = 0.2643, lr_0 = 4.9875e-04
Loss = 2.8074e-02, PNorm = 163.3628, GNorm = 0.3921, lr_0 = 4.9841e-04
Loss = 2.7281e-02, PNorm = 163.3946, GNorm = 0.5074, lr_0 = 4.9807e-04
Loss = 2.7383e-02, PNorm = 163.4311, GNorm = 0.3575, lr_0 = 4.9773e-04
Loss = 2.5340e-02, PNorm = 163.4617, GNorm = 0.2783, lr_0 = 4.9739e-04
Loss = 2.1145e-02, PNorm = 163.4914, GNorm = 0.2801, lr_0 = 4.9705e-04
Loss = 2.9200e-02, PNorm = 163.5221, GNorm = 0.7475, lr_0 = 4.9671e-04
Loss = 2.4472e-02, PNorm = 163.5510, GNorm = 0.4047, lr_0 = 4.9637e-04
Loss = 3.3734e-02, PNorm = 163.5835, GNorm = 0.3107, lr_0 = 4.9603e-04
Loss = 2.4449e-02, PNorm = 163.6154, GNorm = 0.4404, lr_0 = 4.9569e-04
Loss = 2.1757e-02, PNorm = 163.6471, GNorm = 0.3001, lr_0 = 4.9535e-04
Loss = 2.5196e-02, PNorm = 163.6807, GNorm = 0.3968, lr_0 = 4.9501e-04
Loss = 3.2208e-02, PNorm = 163.7091, GNorm = 0.4369, lr_0 = 4.9467e-04
Loss = 2.3433e-02, PNorm = 163.7443, GNorm = 0.4136, lr_0 = 4.9433e-04
Loss = 2.1761e-02, PNorm = 163.7798, GNorm = 0.1797, lr_0 = 4.9399e-04
Loss = 3.4084e-02, PNorm = 163.8163, GNorm = 0.6351, lr_0 = 4.9365e-04
Loss = 3.6779e-02, PNorm = 163.8491, GNorm = 0.2583, lr_0 = 4.9332e-04
Loss = 2.6800e-02, PNorm = 163.8820, GNorm = 0.2863, lr_0 = 4.9298e-04
Loss = 3.0124e-02, PNorm = 163.9190, GNorm = 0.3934, lr_0 = 4.9264e-04
Loss = 2.5650e-02, PNorm = 163.9565, GNorm = 0.3608, lr_0 = 4.9230e-04
Loss = 2.8322e-02, PNorm = 163.9946, GNorm = 0.3690, lr_0 = 4.9197e-04
Loss = 2.9047e-02, PNorm = 164.0303, GNorm = 0.5034, lr_0 = 4.9163e-04
Loss = 2.5565e-02, PNorm = 164.0652, GNorm = 0.5363, lr_0 = 4.9129e-04
Loss = 3.1851e-02, PNorm = 164.0962, GNorm = 0.2885, lr_0 = 4.9095e-04
Loss = 2.5606e-02, PNorm = 164.1240, GNorm = 0.2295, lr_0 = 4.9062e-04
Loss = 3.1487e-02, PNorm = 164.1553, GNorm = 0.2332, lr_0 = 4.9028e-04
Loss = 2.0735e-02, PNorm = 164.1889, GNorm = 0.2469, lr_0 = 4.8995e-04
Loss = 2.5439e-02, PNorm = 164.2205, GNorm = 0.5923, lr_0 = 4.8961e-04
Loss = 2.7299e-02, PNorm = 164.2590, GNorm = 0.6303, lr_0 = 4.8928e-04
Loss = 2.3741e-02, PNorm = 164.2966, GNorm = 0.3190, lr_0 = 4.8894e-04
Loss = 2.6828e-02, PNorm = 164.3304, GNorm = 0.2885, lr_0 = 4.8861e-04
Loss = 2.8261e-02, PNorm = 164.3640, GNorm = 0.3286, lr_0 = 4.8827e-04
Loss = 3.0142e-02, PNorm = 164.3969, GNorm = 0.5755, lr_0 = 4.8794e-04
Loss = 2.6952e-02, PNorm = 164.4298, GNorm = 0.2765, lr_0 = 4.8760e-04
Loss = 3.0717e-02, PNorm = 164.4659, GNorm = 0.6583, lr_0 = 4.8727e-04
Loss = 2.6745e-02, PNorm = 164.4993, GNorm = 0.4485, lr_0 = 4.8693e-04
Loss = 2.1520e-02, PNorm = 164.5310, GNorm = 0.3612, lr_0 = 4.8660e-04
Loss = 2.3721e-02, PNorm = 164.5619, GNorm = 0.3014, lr_0 = 4.8627e-04
Loss = 3.5662e-02, PNorm = 164.5934, GNorm = 0.3809, lr_0 = 4.8593e-04
Loss = 2.7820e-02, PNorm = 164.6301, GNorm = 0.6095, lr_0 = 4.8560e-04
Loss = 2.5714e-02, PNorm = 164.6679, GNorm = 0.3729, lr_0 = 4.8527e-04
Loss = 2.1075e-02, PNorm = 164.7077, GNorm = 0.2990, lr_0 = 4.8494e-04
Loss = 3.2303e-02, PNorm = 164.7399, GNorm = 0.7218, lr_0 = 4.8460e-04
Loss = 2.7318e-02, PNorm = 164.7771, GNorm = 0.2790, lr_0 = 4.8427e-04
Loss = 2.3264e-02, PNorm = 164.8128, GNorm = 0.3045, lr_0 = 4.8394e-04
Loss = 3.6283e-02, PNorm = 164.8485, GNorm = 0.4871, lr_0 = 4.8361e-04
Loss = 2.5370e-02, PNorm = 164.8805, GNorm = 0.5320, lr_0 = 4.8328e-04
Loss = 2.6803e-02, PNorm = 164.9181, GNorm = 0.2173, lr_0 = 4.8295e-04
Loss = 2.6355e-02, PNorm = 164.9480, GNorm = 0.2870, lr_0 = 4.8262e-04
Loss = 3.1049e-02, PNorm = 164.9787, GNorm = 0.2615, lr_0 = 4.8228e-04
Loss = 2.7127e-02, PNorm = 165.0134, GNorm = 0.2298, lr_0 = 4.8195e-04
Loss = 2.7178e-02, PNorm = 165.0476, GNorm = 0.2636, lr_0 = 4.8162e-04
Loss = 2.7857e-02, PNorm = 165.0827, GNorm = 0.4310, lr_0 = 4.8129e-04
Loss = 2.8923e-02, PNorm = 165.1176, GNorm = 0.2204, lr_0 = 4.8096e-04
Loss = 2.8325e-02, PNorm = 165.1514, GNorm = 0.2157, lr_0 = 4.8064e-04
Loss = 3.1928e-02, PNorm = 165.1879, GNorm = 0.7273, lr_0 = 4.8031e-04
Loss = 2.9015e-02, PNorm = 165.2258, GNorm = 0.3262, lr_0 = 4.7998e-04
Loss = 1.9009e-02, PNorm = 165.2631, GNorm = 0.2697, lr_0 = 4.7965e-04
Loss = 2.6211e-02, PNorm = 165.2981, GNorm = 0.4048, lr_0 = 4.7932e-04
Loss = 2.3091e-02, PNorm = 165.3351, GNorm = 0.4756, lr_0 = 4.7899e-04
Loss = 3.3990e-02, PNorm = 165.3756, GNorm = 0.6498, lr_0 = 4.7866e-04
Loss = 2.4681e-02, PNorm = 165.4109, GNorm = 0.4693, lr_0 = 4.7833e-04
Loss = 2.5518e-02, PNorm = 165.4540, GNorm = 0.3968, lr_0 = 4.7801e-04
Loss = 2.8141e-02, PNorm = 165.4903, GNorm = 0.6267, lr_0 = 4.7768e-04
Loss = 2.6188e-02, PNorm = 165.5278, GNorm = 0.2413, lr_0 = 4.7735e-04
Loss = 2.9225e-02, PNorm = 165.5636, GNorm = 0.8234, lr_0 = 4.7703e-04
Validation mae = 0.122028
Epoch 11
Loss = 2.5581e-02, PNorm = 165.5940, GNorm = 0.2404, lr_0 = 4.7670e-04
Loss = 2.3318e-02, PNorm = 165.6189, GNorm = 0.2796, lr_0 = 4.7637e-04
Loss = 2.3087e-02, PNorm = 165.6399, GNorm = 0.4963, lr_0 = 4.7605e-04
Loss = 2.9154e-02, PNorm = 165.6598, GNorm = 0.5406, lr_0 = 4.7572e-04
Loss = 2.4831e-02, PNorm = 165.6875, GNorm = 0.3079, lr_0 = 4.7539e-04
Loss = 2.8432e-02, PNorm = 165.7132, GNorm = 1.3810, lr_0 = 4.7507e-04
Loss = 1.8723e-02, PNorm = 165.7377, GNorm = 0.3101, lr_0 = 4.7474e-04
Loss = 2.3860e-02, PNorm = 165.7568, GNorm = 0.1752, lr_0 = 4.7442e-04
Loss = 2.3924e-02, PNorm = 165.7823, GNorm = 0.3964, lr_0 = 4.7409e-04
Loss = 1.8620e-02, PNorm = 165.8074, GNorm = 0.2117, lr_0 = 4.7377e-04
Loss = 2.1892e-02, PNorm = 165.8284, GNorm = 0.3136, lr_0 = 4.7344e-04
Loss = 2.1186e-02, PNorm = 165.8519, GNorm = 0.2425, lr_0 = 4.7312e-04
Loss = 2.3742e-02, PNorm = 165.8718, GNorm = 0.2958, lr_0 = 4.7279e-04
Loss = 2.5549e-02, PNorm = 165.8946, GNorm = 0.4131, lr_0 = 4.7247e-04
Loss = 2.1642e-02, PNorm = 165.9170, GNorm = 0.3355, lr_0 = 4.7215e-04
Loss = 2.3913e-02, PNorm = 165.9330, GNorm = 0.4259, lr_0 = 4.7182e-04
Loss = 2.7916e-02, PNorm = 165.9582, GNorm = 0.6875, lr_0 = 4.7150e-04
Loss = 3.6470e-02, PNorm = 165.9916, GNorm = 0.3195, lr_0 = 4.7118e-04
Loss = 2.1306e-02, PNorm = 166.0204, GNorm = 0.2576, lr_0 = 4.7085e-04
Loss = 2.0292e-02, PNorm = 166.0428, GNorm = 0.5516, lr_0 = 4.7053e-04
Loss = 2.1808e-02, PNorm = 166.0642, GNorm = 0.3599, lr_0 = 4.7021e-04
Loss = 2.6459e-02, PNorm = 166.0813, GNorm = 0.1943, lr_0 = 4.6989e-04
Loss = 2.2132e-02, PNorm = 166.1017, GNorm = 0.7255, lr_0 = 4.6957e-04
Loss = 2.0737e-02, PNorm = 166.1248, GNorm = 0.3218, lr_0 = 4.6924e-04
Loss = 2.3107e-02, PNorm = 166.1488, GNorm = 0.3391, lr_0 = 4.6892e-04
Loss = 4.9786e-02, PNorm = 166.1746, GNorm = 0.2721, lr_0 = 4.6860e-04
Loss = 1.6329e-02, PNorm = 166.2015, GNorm = 0.3369, lr_0 = 4.6828e-04
Loss = 2.4895e-02, PNorm = 166.2215, GNorm = 0.4211, lr_0 = 4.6796e-04
Loss = 2.4151e-02, PNorm = 166.2456, GNorm = 0.3344, lr_0 = 4.6764e-04
Loss = 2.1975e-02, PNorm = 166.2715, GNorm = 0.2110, lr_0 = 4.6732e-04
Loss = 2.4228e-02, PNorm = 166.2937, GNorm = 0.3032, lr_0 = 4.6700e-04
Loss = 2.2497e-02, PNorm = 166.3177, GNorm = 0.2387, lr_0 = 4.6668e-04
Loss = 2.0839e-02, PNorm = 166.3432, GNorm = 0.2541, lr_0 = 4.6636e-04
Loss = 1.9086e-02, PNorm = 166.3658, GNorm = 0.2641, lr_0 = 4.6604e-04
Loss = 2.4033e-02, PNorm = 166.3887, GNorm = 0.5409, lr_0 = 4.6572e-04
Loss = 2.8062e-02, PNorm = 166.4119, GNorm = 0.6706, lr_0 = 4.6540e-04
Loss = 2.6072e-02, PNorm = 166.4382, GNorm = 0.4274, lr_0 = 4.6508e-04
Loss = 2.0679e-02, PNorm = 166.4647, GNorm = 0.3491, lr_0 = 4.6476e-04
Loss = 1.6408e-02, PNorm = 166.4920, GNorm = 0.1663, lr_0 = 4.6445e-04
Loss = 2.2821e-02, PNorm = 166.5134, GNorm = 0.4415, lr_0 = 4.6413e-04
Loss = 2.5482e-02, PNorm = 166.5384, GNorm = 0.3592, lr_0 = 4.6381e-04
Loss = 1.5323e-02, PNorm = 166.5625, GNorm = 0.1954, lr_0 = 4.6349e-04
Loss = 1.7387e-02, PNorm = 166.5834, GNorm = 0.3148, lr_0 = 4.6317e-04
Loss = 2.1687e-02, PNorm = 166.6085, GNorm = 0.1822, lr_0 = 4.6286e-04
Loss = 2.1922e-02, PNorm = 166.6309, GNorm = 0.2788, lr_0 = 4.6254e-04
Loss = 1.7524e-02, PNorm = 166.6555, GNorm = 0.3239, lr_0 = 4.6222e-04
Loss = 2.0227e-02, PNorm = 166.6784, GNorm = 0.1724, lr_0 = 4.6191e-04
Loss = 2.3937e-02, PNorm = 166.7019, GNorm = 0.2917, lr_0 = 4.6159e-04
Loss = 2.4514e-02, PNorm = 166.7283, GNorm = 0.4002, lr_0 = 4.6127e-04
Loss = 1.9814e-02, PNorm = 166.7545, GNorm = 0.3955, lr_0 = 4.6096e-04
Loss = 2.1489e-02, PNorm = 166.7848, GNorm = 0.4346, lr_0 = 4.6064e-04
Loss = 2.0923e-02, PNorm = 166.8133, GNorm = 0.2696, lr_0 = 4.6033e-04
Loss = 1.7535e-02, PNorm = 166.8388, GNorm = 0.3116, lr_0 = 4.6001e-04
Loss = 2.1495e-02, PNorm = 166.8642, GNorm = 0.5425, lr_0 = 4.5970e-04
Loss = 2.2283e-02, PNorm = 166.8897, GNorm = 0.2455, lr_0 = 4.5938e-04
Loss = 2.2288e-02, PNorm = 166.9141, GNorm = 0.3337, lr_0 = 4.5907e-04
Loss = 2.2704e-02, PNorm = 166.9372, GNorm = 0.2816, lr_0 = 4.5875e-04
Loss = 1.9993e-02, PNorm = 166.9648, GNorm = 0.2883, lr_0 = 4.5844e-04
Loss = 1.9894e-02, PNorm = 166.9881, GNorm = 0.3532, lr_0 = 4.5812e-04
Loss = 1.9841e-02, PNorm = 167.0102, GNorm = 0.2302, lr_0 = 4.5781e-04
Loss = 2.3808e-02, PNorm = 167.0367, GNorm = 0.4253, lr_0 = 4.5750e-04
Loss = 2.7141e-02, PNorm = 167.0658, GNorm = 0.7919, lr_0 = 4.5718e-04
Loss = 2.3996e-02, PNorm = 167.0968, GNorm = 0.3286, lr_0 = 4.5687e-04
Loss = 2.0812e-02, PNorm = 167.1294, GNorm = 0.2891, lr_0 = 4.5656e-04
Loss = 2.4727e-02, PNorm = 167.1577, GNorm = 0.4217, lr_0 = 4.5624e-04
Loss = 1.9615e-02, PNorm = 167.1867, GNorm = 0.5317, lr_0 = 4.5593e-04
Loss = 2.1592e-02, PNorm = 167.2166, GNorm = 0.3434, lr_0 = 4.5562e-04
Loss = 2.9011e-02, PNorm = 167.2479, GNorm = 0.2921, lr_0 = 4.5531e-04
Loss = 2.2935e-02, PNorm = 167.2761, GNorm = 0.4377, lr_0 = 4.5499e-04
Loss = 2.1489e-02, PNorm = 167.3043, GNorm = 0.5361, lr_0 = 4.5468e-04
Loss = 2.0409e-02, PNorm = 167.3305, GNorm = 0.3957, lr_0 = 4.5437e-04
Loss = 2.5052e-02, PNorm = 167.3585, GNorm = 0.2468, lr_0 = 4.5406e-04
Loss = 2.5367e-02, PNorm = 167.3850, GNorm = 0.7933, lr_0 = 4.5375e-04
Loss = 1.7312e-02, PNorm = 167.4126, GNorm = 0.4719, lr_0 = 4.5344e-04
Loss = 1.8928e-02, PNorm = 167.4361, GNorm = 0.2729, lr_0 = 4.5313e-04
Loss = 2.7087e-02, PNorm = 167.4627, GNorm = 0.4157, lr_0 = 4.5282e-04
Loss = 2.1172e-02, PNorm = 167.4879, GNorm = 0.2307, lr_0 = 4.5251e-04
Loss = 3.6044e-02, PNorm = 167.5195, GNorm = 0.5113, lr_0 = 4.5220e-04
Loss = 2.5724e-02, PNorm = 167.5502, GNorm = 0.4159, lr_0 = 4.5189e-04
Loss = 2.3695e-02, PNorm = 167.5850, GNorm = 0.3191, lr_0 = 4.5158e-04
Loss = 2.3837e-02, PNorm = 167.6146, GNorm = 0.4017, lr_0 = 4.5127e-04
Loss = 2.4705e-02, PNorm = 167.6435, GNorm = 0.4127, lr_0 = 4.5096e-04
Loss = 2.5155e-02, PNorm = 167.6701, GNorm = 0.5457, lr_0 = 4.5065e-04
Loss = 2.8054e-02, PNorm = 167.6976, GNorm = 0.2812, lr_0 = 4.5034e-04
Loss = 1.7318e-02, PNorm = 167.7220, GNorm = 0.4164, lr_0 = 4.5003e-04
Loss = 2.5486e-02, PNorm = 167.7478, GNorm = 0.4190, lr_0 = 4.4972e-04
Loss = 2.5650e-02, PNorm = 167.7749, GNorm = 0.4639, lr_0 = 4.4942e-04
Loss = 1.9930e-02, PNorm = 167.8061, GNorm = 0.2631, lr_0 = 4.4911e-04
Loss = 1.9868e-02, PNorm = 167.8352, GNorm = 0.4466, lr_0 = 4.4880e-04
Loss = 2.0752e-02, PNorm = 167.8671, GNorm = 0.2646, lr_0 = 4.4849e-04
Loss = 2.1267e-02, PNorm = 167.8948, GNorm = 0.1747, lr_0 = 4.4819e-04
Loss = 1.8883e-02, PNorm = 167.9201, GNorm = 0.3410, lr_0 = 4.4788e-04
Loss = 3.3993e-02, PNorm = 167.9466, GNorm = 0.7076, lr_0 = 4.4757e-04
Loss = 2.2746e-02, PNorm = 167.9732, GNorm = 0.2453, lr_0 = 4.4727e-04
Loss = 1.7039e-02, PNorm = 168.0057, GNorm = 0.1633, lr_0 = 4.4696e-04
Loss = 1.7627e-02, PNorm = 168.0329, GNorm = 0.2499, lr_0 = 4.4665e-04
Loss = 1.9339e-02, PNorm = 168.0614, GNorm = 0.2964, lr_0 = 4.4635e-04
Loss = 2.3797e-02, PNorm = 168.0936, GNorm = 0.3960, lr_0 = 4.4604e-04
Loss = 2.5267e-02, PNorm = 168.1203, GNorm = 0.5324, lr_0 = 4.4574e-04
Loss = 2.4625e-02, PNorm = 168.1462, GNorm = 0.3078, lr_0 = 4.4543e-04
Loss = 1.9099e-02, PNorm = 168.1784, GNorm = 0.3556, lr_0 = 4.4513e-04
Loss = 2.2567e-02, PNorm = 168.2067, GNorm = 0.4625, lr_0 = 4.4482e-04
Loss = 3.3365e-02, PNorm = 168.2338, GNorm = 0.1998, lr_0 = 4.4452e-04
Loss = 2.7267e-02, PNorm = 168.2657, GNorm = 0.5975, lr_0 = 4.4421e-04
Loss = 1.8976e-02, PNorm = 168.2927, GNorm = 0.6874, lr_0 = 4.4391e-04
Loss = 2.5785e-02, PNorm = 168.3231, GNorm = 0.2411, lr_0 = 4.4360e-04
Loss = 2.5112e-02, PNorm = 168.3560, GNorm = 0.2189, lr_0 = 4.4330e-04
Loss = 2.7303e-02, PNorm = 168.3882, GNorm = 0.3143, lr_0 = 4.4299e-04
Loss = 2.5961e-02, PNorm = 168.4117, GNorm = 0.3485, lr_0 = 4.4269e-04
Loss = 2.2509e-02, PNorm = 168.4407, GNorm = 0.5454, lr_0 = 4.4239e-04
Loss = 2.7768e-02, PNorm = 168.4703, GNorm = 0.3057, lr_0 = 4.4209e-04
Loss = 2.0167e-02, PNorm = 168.5029, GNorm = 0.2265, lr_0 = 4.4178e-04
Loss = 2.4037e-02, PNorm = 168.5328, GNorm = 0.3171, lr_0 = 4.4148e-04
Loss = 2.7335e-02, PNorm = 168.5646, GNorm = 0.2663, lr_0 = 4.4118e-04
Loss = 3.1546e-02, PNorm = 168.5953, GNorm = 0.7163, lr_0 = 4.4088e-04
Loss = 2.1386e-02, PNorm = 168.6234, GNorm = 0.2916, lr_0 = 4.4057e-04
Loss = 2.6590e-02, PNorm = 168.6537, GNorm = 0.2329, lr_0 = 4.4027e-04
Loss = 2.2945e-02, PNorm = 168.6860, GNorm = 0.3785, lr_0 = 4.3997e-04
Loss = 2.9571e-02, PNorm = 168.7143, GNorm = 0.5106, lr_0 = 4.3967e-04
Loss = 2.3669e-02, PNorm = 168.7461, GNorm = 0.4709, lr_0 = 4.3937e-04
Validation mae = 0.122105
Epoch 12
Loss = 2.1645e-02, PNorm = 168.7699, GNorm = 0.2899, lr_0 = 4.3907e-04
Loss = 1.8353e-02, PNorm = 168.7892, GNorm = 0.3720, lr_0 = 4.3877e-04
Loss = 2.1377e-02, PNorm = 168.8114, GNorm = 0.4821, lr_0 = 4.3846e-04
Loss = 2.7777e-02, PNorm = 168.8316, GNorm = 2.2949, lr_0 = 4.3816e-04
Loss = 1.5927e-02, PNorm = 168.8596, GNorm = 0.3407, lr_0 = 4.3786e-04
Loss = 2.0314e-02, PNorm = 168.8826, GNorm = 0.1806, lr_0 = 4.3756e-04
Loss = 1.9255e-02, PNorm = 168.9034, GNorm = 0.3721, lr_0 = 4.3726e-04
Loss = 1.8419e-02, PNorm = 168.9223, GNorm = 0.3454, lr_0 = 4.3696e-04
Loss = 1.7835e-02, PNorm = 168.9404, GNorm = 0.3171, lr_0 = 4.3667e-04
Loss = 1.8038e-02, PNorm = 168.9570, GNorm = 0.2026, lr_0 = 4.3637e-04
Loss = 2.4935e-02, PNorm = 168.9759, GNorm = 0.3145, lr_0 = 4.3607e-04
Loss = 2.1812e-02, PNorm = 168.9952, GNorm = 0.2084, lr_0 = 4.3577e-04
Loss = 1.8542e-02, PNorm = 169.0123, GNorm = 0.4659, lr_0 = 4.3547e-04
Loss = 1.9285e-02, PNorm = 169.0336, GNorm = 0.2467, lr_0 = 4.3517e-04
Loss = 2.0171e-02, PNorm = 169.0553, GNorm = 0.3047, lr_0 = 4.3487e-04
Loss = 1.3734e-02, PNorm = 169.0760, GNorm = 0.3256, lr_0 = 4.3458e-04
Loss = 2.4113e-02, PNorm = 169.0952, GNorm = 0.9762, lr_0 = 4.3428e-04
Loss = 1.7036e-02, PNorm = 169.1171, GNorm = 0.1713, lr_0 = 4.3398e-04
Loss = 1.6402e-02, PNorm = 169.1371, GNorm = 0.2042, lr_0 = 4.3368e-04
Loss = 2.0991e-02, PNorm = 169.1580, GNorm = 1.0158, lr_0 = 4.3339e-04
Loss = 1.6162e-02, PNorm = 169.1751, GNorm = 0.2980, lr_0 = 4.3309e-04
Loss = 1.9163e-02, PNorm = 169.1960, GNorm = 0.5064, lr_0 = 4.3279e-04
Loss = 1.8272e-02, PNorm = 169.2160, GNorm = 0.2969, lr_0 = 4.3250e-04
Loss = 1.7005e-02, PNorm = 169.2351, GNorm = 0.7793, lr_0 = 4.3220e-04
Loss = 1.7668e-02, PNorm = 169.2550, GNorm = 0.2308, lr_0 = 4.3190e-04
Loss = 1.5105e-02, PNorm = 169.2748, GNorm = 0.2789, lr_0 = 4.3161e-04
Loss = 2.1571e-02, PNorm = 169.2975, GNorm = 0.2494, lr_0 = 4.3131e-04
Loss = 1.9510e-02, PNorm = 169.3159, GNorm = 0.4685, lr_0 = 4.3102e-04
Loss = 1.7652e-02, PNorm = 169.3351, GNorm = 0.1772, lr_0 = 4.3072e-04
Loss = 1.8902e-02, PNorm = 169.3547, GNorm = 0.2204, lr_0 = 4.3043e-04
Loss = 1.8121e-02, PNorm = 169.3735, GNorm = 0.2405, lr_0 = 4.3013e-04
Loss = 2.3083e-02, PNorm = 169.3931, GNorm = 0.4600, lr_0 = 4.2984e-04
Loss = 1.8088e-02, PNorm = 169.4149, GNorm = 0.2917, lr_0 = 4.2954e-04
Loss = 1.9745e-02, PNorm = 169.4389, GNorm = 0.6478, lr_0 = 4.2925e-04
Loss = 1.6808e-02, PNorm = 169.4610, GNorm = 0.2669, lr_0 = 4.2895e-04
Loss = 2.5391e-02, PNorm = 169.4833, GNorm = 0.2038, lr_0 = 4.2866e-04
Loss = 1.9680e-02, PNorm = 169.5064, GNorm = 0.3701, lr_0 = 4.2837e-04
Loss = 2.4939e-02, PNorm = 169.5242, GNorm = 0.2632, lr_0 = 4.2807e-04
Loss = 1.7607e-02, PNorm = 169.5427, GNorm = 0.4247, lr_0 = 4.2778e-04
Loss = 1.7213e-02, PNorm = 169.5628, GNorm = 0.2256, lr_0 = 4.2749e-04
Loss = 1.8726e-02, PNorm = 169.5801, GNorm = 0.3530, lr_0 = 4.2719e-04
Loss = 2.1592e-02, PNorm = 169.5988, GNorm = 0.2479, lr_0 = 4.2690e-04
Loss = 1.6095e-02, PNorm = 169.6234, GNorm = 0.3297, lr_0 = 4.2661e-04
Loss = 1.6918e-02, PNorm = 169.6469, GNorm = 0.2052, lr_0 = 4.2632e-04
Loss = 1.6829e-02, PNorm = 169.6681, GNorm = 0.2224, lr_0 = 4.2602e-04
Loss = 1.7192e-02, PNorm = 169.6896, GNorm = 0.3240, lr_0 = 4.2573e-04
Loss = 2.1501e-02, PNorm = 169.7120, GNorm = 0.1927, lr_0 = 4.2544e-04
Loss = 1.9081e-02, PNorm = 169.7321, GNorm = 0.3807, lr_0 = 4.2515e-04
Loss = 2.5794e-02, PNorm = 169.7528, GNorm = 0.1842, lr_0 = 4.2486e-04
Loss = 2.3267e-02, PNorm = 169.7806, GNorm = 0.3800, lr_0 = 4.2457e-04
Loss = 1.7541e-02, PNorm = 169.8049, GNorm = 0.3493, lr_0 = 4.2428e-04
Loss = 2.2990e-02, PNorm = 169.8262, GNorm = 0.4014, lr_0 = 4.2399e-04
Loss = 1.9883e-02, PNorm = 169.8445, GNorm = 0.8172, lr_0 = 4.2370e-04
Loss = 1.7376e-02, PNorm = 169.8698, GNorm = 0.6195, lr_0 = 4.2340e-04
Loss = 1.8723e-02, PNorm = 169.8919, GNorm = 0.5139, lr_0 = 4.2311e-04
Loss = 2.1309e-02, PNorm = 169.9148, GNorm = 0.4683, lr_0 = 4.2283e-04
Loss = 1.8841e-02, PNorm = 169.9360, GNorm = 0.5152, lr_0 = 4.2254e-04
Loss = 1.9519e-02, PNorm = 169.9588, GNorm = 0.2618, lr_0 = 4.2225e-04
Loss = 1.9234e-02, PNorm = 169.9827, GNorm = 0.3228, lr_0 = 4.2196e-04
Loss = 1.9397e-02, PNorm = 170.0087, GNorm = 0.3679, lr_0 = 4.2167e-04
Loss = 1.5329e-02, PNorm = 170.0338, GNorm = 0.4040, lr_0 = 4.2138e-04
Loss = 2.0240e-02, PNorm = 170.0544, GNorm = 0.6891, lr_0 = 4.2109e-04
Loss = 1.6543e-02, PNorm = 170.0782, GNorm = 0.2985, lr_0 = 4.2080e-04
Loss = 1.6224e-02, PNorm = 170.1013, GNorm = 0.2324, lr_0 = 4.2051e-04
Loss = 2.2807e-02, PNorm = 170.1256, GNorm = 0.2888, lr_0 = 4.2023e-04
Loss = 1.4538e-02, PNorm = 170.1493, GNorm = 0.2273, lr_0 = 4.1994e-04
Loss = 1.5912e-02, PNorm = 170.1741, GNorm = 0.2652, lr_0 = 4.1965e-04
Loss = 2.3566e-02, PNorm = 170.1945, GNorm = 0.3430, lr_0 = 4.1936e-04
Loss = 1.2714e-02, PNorm = 170.2149, GNorm = 0.3714, lr_0 = 4.1907e-04
Loss = 1.7312e-02, PNorm = 170.2352, GNorm = 0.6373, lr_0 = 4.1879e-04
Loss = 1.6241e-02, PNorm = 170.2568, GNorm = 0.2836, lr_0 = 4.1850e-04
Loss = 2.4801e-02, PNorm = 170.2777, GNorm = 0.1708, lr_0 = 4.1821e-04
Loss = 2.2099e-02, PNorm = 170.3070, GNorm = 0.2283, lr_0 = 4.1793e-04
Loss = 1.9576e-02, PNorm = 170.3342, GNorm = 0.2594, lr_0 = 4.1764e-04
Loss = 1.8847e-02, PNorm = 170.3583, GNorm = 0.4693, lr_0 = 4.1736e-04
Loss = 2.1292e-02, PNorm = 170.3853, GNorm = 0.8304, lr_0 = 4.1707e-04
Loss = 1.9796e-02, PNorm = 170.4056, GNorm = 0.2842, lr_0 = 4.1678e-04
Loss = 1.8975e-02, PNorm = 170.4261, GNorm = 0.3080, lr_0 = 4.1650e-04
Loss = 1.8056e-02, PNorm = 170.4503, GNorm = 0.3130, lr_0 = 4.1621e-04
Loss = 2.2628e-02, PNorm = 170.4740, GNorm = 0.2198, lr_0 = 4.1593e-04
Loss = 3.1455e-02, PNorm = 170.4978, GNorm = 0.4156, lr_0 = 4.1564e-04
Loss = 1.8874e-02, PNorm = 170.5239, GNorm = 0.5936, lr_0 = 4.1536e-04
Loss = 1.9389e-02, PNorm = 170.5504, GNorm = 0.3603, lr_0 = 4.1507e-04
Loss = 1.7030e-02, PNorm = 170.5772, GNorm = 0.4691, lr_0 = 4.1479e-04
Loss = 1.4732e-02, PNorm = 170.5999, GNorm = 0.5522, lr_0 = 4.1450e-04
Loss = 1.6569e-02, PNorm = 170.6230, GNorm = 0.6278, lr_0 = 4.1422e-04
Loss = 1.7224e-02, PNorm = 170.6456, GNorm = 0.4544, lr_0 = 4.1394e-04
Loss = 1.7732e-02, PNorm = 170.6655, GNorm = 0.3663, lr_0 = 4.1365e-04
Loss = 1.7196e-02, PNorm = 170.6894, GNorm = 0.3293, lr_0 = 4.1337e-04
Loss = 3.6595e-02, PNorm = 170.7129, GNorm = 0.4376, lr_0 = 4.1309e-04
Loss = 2.1876e-02, PNorm = 170.7372, GNorm = 0.6305, lr_0 = 4.1280e-04
Loss = 1.9982e-02, PNorm = 170.7630, GNorm = 0.4255, lr_0 = 4.1252e-04
Loss = 1.6692e-02, PNorm = 170.7853, GNorm = 0.2131, lr_0 = 4.1224e-04
Loss = 1.8486e-02, PNorm = 170.8042, GNorm = 0.2261, lr_0 = 4.1196e-04
Loss = 1.6282e-02, PNorm = 170.8258, GNorm = 0.2514, lr_0 = 4.1167e-04
Loss = 1.6659e-02, PNorm = 170.8534, GNorm = 0.3810, lr_0 = 4.1139e-04
Loss = 1.4932e-02, PNorm = 170.8793, GNorm = 0.5202, lr_0 = 4.1111e-04
Loss = 1.9724e-02, PNorm = 170.9039, GNorm = 0.1988, lr_0 = 4.1083e-04
Loss = 1.6585e-02, PNorm = 170.9250, GNorm = 0.3702, lr_0 = 4.1055e-04
Loss = 1.6936e-02, PNorm = 170.9469, GNorm = 0.1884, lr_0 = 4.1027e-04
Loss = 2.3058e-02, PNorm = 170.9733, GNorm = 0.2257, lr_0 = 4.0998e-04
Loss = 1.9620e-02, PNorm = 170.9986, GNorm = 0.2706, lr_0 = 4.0970e-04
Loss = 2.7352e-02, PNorm = 171.0206, GNorm = 0.1552, lr_0 = 4.0942e-04
Loss = 2.0318e-02, PNorm = 171.0426, GNorm = 0.3892, lr_0 = 4.0914e-04
Loss = 1.9501e-02, PNorm = 171.0654, GNorm = 0.3355, lr_0 = 4.0886e-04
Loss = 1.7016e-02, PNorm = 171.0900, GNorm = 0.2165, lr_0 = 4.0858e-04
Loss = 1.4444e-02, PNorm = 171.1154, GNorm = 0.3151, lr_0 = 4.0830e-04
Loss = 2.0453e-02, PNorm = 171.1400, GNorm = 0.1780, lr_0 = 4.0802e-04
Loss = 2.0020e-02, PNorm = 171.1652, GNorm = 0.2355, lr_0 = 4.0774e-04
Loss = 1.9810e-02, PNorm = 171.1892, GNorm = 0.6324, lr_0 = 4.0746e-04
Loss = 2.2265e-02, PNorm = 171.2114, GNorm = 0.3054, lr_0 = 4.0718e-04
Loss = 1.9507e-02, PNorm = 171.2363, GNorm = 0.3484, lr_0 = 4.0691e-04
Loss = 1.9333e-02, PNorm = 171.2623, GNorm = 0.6029, lr_0 = 4.0663e-04
Loss = 2.2274e-02, PNorm = 171.2836, GNorm = 0.4545, lr_0 = 4.0635e-04
Loss = 1.7909e-02, PNorm = 171.3049, GNorm = 0.3262, lr_0 = 4.0607e-04
Loss = 1.5982e-02, PNorm = 171.3286, GNorm = 0.2305, lr_0 = 4.0579e-04
Loss = 1.5387e-02, PNorm = 171.3501, GNorm = 0.2511, lr_0 = 4.0551e-04
Loss = 1.6764e-02, PNorm = 171.3725, GNorm = 0.2159, lr_0 = 4.0524e-04
Loss = 1.7047e-02, PNorm = 171.3948, GNorm = 0.1552, lr_0 = 4.0496e-04
Loss = 2.2624e-02, PNorm = 171.4162, GNorm = 0.1858, lr_0 = 4.0468e-04
Validation mae = 0.121624
Epoch 13
Loss = 2.3777e-02, PNorm = 171.4351, GNorm = 0.2317, lr_0 = 4.0440e-04
Loss = 1.5922e-02, PNorm = 171.4567, GNorm = 0.2354, lr_0 = 4.0413e-04
Loss = 1.7217e-02, PNorm = 171.4763, GNorm = 0.3101, lr_0 = 4.0385e-04
Loss = 2.5192e-02, PNorm = 171.4963, GNorm = 0.5711, lr_0 = 4.0357e-04
Loss = 1.5907e-02, PNorm = 171.5159, GNorm = 0.4696, lr_0 = 4.0330e-04
Loss = 1.6857e-02, PNorm = 171.5357, GNorm = 0.3398, lr_0 = 4.0302e-04
Loss = 1.7775e-02, PNorm = 171.5527, GNorm = 0.5614, lr_0 = 4.0274e-04
Loss = 1.6012e-02, PNorm = 171.5673, GNorm = 0.3203, lr_0 = 4.0247e-04
Loss = 1.5813e-02, PNorm = 171.5898, GNorm = 0.4644, lr_0 = 4.0219e-04
Loss = 1.6524e-02, PNorm = 171.6094, GNorm = 0.2912, lr_0 = 4.0192e-04
Loss = 1.5607e-02, PNorm = 171.6295, GNorm = 0.4449, lr_0 = 4.0164e-04
Loss = 1.2737e-02, PNorm = 171.6465, GNorm = 0.2258, lr_0 = 4.0137e-04
Loss = 1.8258e-02, PNorm = 171.6619, GNorm = 0.1330, lr_0 = 4.0109e-04
Loss = 1.6413e-02, PNorm = 171.6770, GNorm = 0.5822, lr_0 = 4.0082e-04
Loss = 1.3368e-02, PNorm = 171.6947, GNorm = 0.3441, lr_0 = 4.0054e-04
Loss = 1.5332e-02, PNorm = 171.7089, GNorm = 0.5326, lr_0 = 4.0027e-04
Loss = 1.9412e-02, PNorm = 171.7286, GNorm = 0.4948, lr_0 = 3.9999e-04
Loss = 1.5493e-02, PNorm = 171.7472, GNorm = 0.2846, lr_0 = 3.9972e-04
Loss = 1.5476e-02, PNorm = 171.7660, GNorm = 0.4557, lr_0 = 3.9945e-04
Loss = 1.9757e-02, PNorm = 171.7857, GNorm = 0.2892, lr_0 = 3.9917e-04
Loss = 1.8186e-02, PNorm = 171.8032, GNorm = 0.5434, lr_0 = 3.9890e-04
Loss = 1.8248e-02, PNorm = 171.8196, GNorm = 0.3333, lr_0 = 3.9863e-04
Loss = 1.4572e-02, PNorm = 171.8347, GNorm = 0.2043, lr_0 = 3.9835e-04
Loss = 2.6419e-02, PNorm = 171.8493, GNorm = 0.2802, lr_0 = 3.9808e-04
Loss = 1.4031e-02, PNorm = 171.8639, GNorm = 0.1846, lr_0 = 3.9781e-04
Loss = 1.3031e-02, PNorm = 171.8772, GNorm = 0.3359, lr_0 = 3.9753e-04
Loss = 1.1848e-02, PNorm = 171.8925, GNorm = 0.1993, lr_0 = 3.9726e-04
Loss = 1.2951e-02, PNorm = 171.9107, GNorm = 0.1538, lr_0 = 3.9699e-04
Loss = 1.4779e-02, PNorm = 171.9275, GNorm = 0.2631, lr_0 = 3.9672e-04
Loss = 1.4968e-02, PNorm = 171.9465, GNorm = 0.3187, lr_0 = 3.9645e-04
Loss = 1.7664e-02, PNorm = 171.9643, GNorm = 0.2472, lr_0 = 3.9617e-04
Loss = 1.7666e-02, PNorm = 171.9817, GNorm = 0.1468, lr_0 = 3.9590e-04
Loss = 1.4926e-02, PNorm = 172.0002, GNorm = 0.3826, lr_0 = 3.9563e-04
Loss = 1.1020e-02, PNorm = 172.0186, GNorm = 0.3060, lr_0 = 3.9536e-04
Loss = 2.5901e-02, PNorm = 172.0395, GNorm = 0.2919, lr_0 = 3.9509e-04
Loss = 1.3314e-02, PNorm = 172.0596, GNorm = 0.3575, lr_0 = 3.9482e-04
Loss = 1.1513e-02, PNorm = 172.0808, GNorm = 0.1904, lr_0 = 3.9455e-04
Loss = 1.7433e-02, PNorm = 172.0998, GNorm = 0.6516, lr_0 = 3.9428e-04
Loss = 1.4535e-02, PNorm = 172.1165, GNorm = 0.1717, lr_0 = 3.9401e-04
Loss = 2.1719e-02, PNorm = 172.1356, GNorm = 0.2835, lr_0 = 3.9374e-04
Loss = 1.6543e-02, PNorm = 172.1543, GNorm = 0.2494, lr_0 = 3.9347e-04
Loss = 1.9551e-02, PNorm = 172.1700, GNorm = 1.0975, lr_0 = 3.9320e-04
Loss = 1.5661e-02, PNorm = 172.1854, GNorm = 0.2369, lr_0 = 3.9293e-04
Loss = 1.4227e-02, PNorm = 172.2023, GNorm = 0.1637, lr_0 = 3.9266e-04
Loss = 1.4455e-02, PNorm = 172.2176, GNorm = 0.2345, lr_0 = 3.9239e-04
Loss = 1.6282e-02, PNorm = 172.2351, GNorm = 0.2305, lr_0 = 3.9212e-04
Loss = 1.4700e-02, PNorm = 172.2514, GNorm = 0.1855, lr_0 = 3.9185e-04
Loss = 1.4520e-02, PNorm = 172.2708, GNorm = 0.3052, lr_0 = 3.9159e-04
Loss = 1.5572e-02, PNorm = 172.2888, GNorm = 0.7348, lr_0 = 3.9132e-04
Loss = 1.8349e-02, PNorm = 172.3045, GNorm = 0.3252, lr_0 = 3.9105e-04
Loss = 1.8894e-02, PNorm = 172.3258, GNorm = 0.2168, lr_0 = 3.9078e-04
Loss = 1.2131e-02, PNorm = 172.3479, GNorm = 0.1495, lr_0 = 3.9051e-04
Loss = 1.5063e-02, PNorm = 172.3661, GNorm = 0.2132, lr_0 = 3.9025e-04
Loss = 1.7962e-02, PNorm = 172.3855, GNorm = 0.5088, lr_0 = 3.8998e-04
Loss = 1.5217e-02, PNorm = 172.4053, GNorm = 0.3369, lr_0 = 3.8971e-04
Loss = 1.7435e-02, PNorm = 172.4250, GNorm = 0.2719, lr_0 = 3.8945e-04
Loss = 1.8878e-02, PNorm = 172.4423, GNorm = 0.3962, lr_0 = 3.8918e-04
Loss = 1.8686e-02, PNorm = 172.4612, GNorm = 0.3460, lr_0 = 3.8891e-04
Loss = 1.6502e-02, PNorm = 172.4804, GNorm = 0.2061, lr_0 = 3.8865e-04
Loss = 1.4090e-02, PNorm = 172.4973, GNorm = 0.1464, lr_0 = 3.8838e-04
Loss = 1.2892e-02, PNorm = 172.5133, GNorm = 0.4707, lr_0 = 3.8811e-04
Loss = 1.3432e-02, PNorm = 172.5298, GNorm = 0.1455, lr_0 = 3.8785e-04
Loss = 1.7765e-02, PNorm = 172.5460, GNorm = 0.4617, lr_0 = 3.8758e-04
Loss = 2.0338e-02, PNorm = 172.5688, GNorm = 0.7279, lr_0 = 3.8732e-04
Loss = 1.3817e-02, PNorm = 172.5842, GNorm = 0.2497, lr_0 = 3.8705e-04
Loss = 2.3017e-02, PNorm = 172.6037, GNorm = 0.1715, lr_0 = 3.8679e-04
Loss = 1.9007e-02, PNorm = 172.6193, GNorm = 0.3296, lr_0 = 3.8652e-04
Loss = 1.9699e-02, PNorm = 172.6383, GNorm = 0.1884, lr_0 = 3.8626e-04
Loss = 1.6680e-02, PNorm = 172.6639, GNorm = 0.2391, lr_0 = 3.8599e-04
Loss = 1.3394e-02, PNorm = 172.6882, GNorm = 0.2564, lr_0 = 3.8573e-04
Loss = 1.8330e-02, PNorm = 172.7020, GNorm = 0.6038, lr_0 = 3.8546e-04
Loss = 1.8386e-02, PNorm = 172.7193, GNorm = 0.3078, lr_0 = 3.8520e-04
Loss = 1.4711e-02, PNorm = 172.7388, GNorm = 0.2788, lr_0 = 3.8493e-04
Loss = 1.9445e-02, PNorm = 172.7584, GNorm = 0.3982, lr_0 = 3.8467e-04
Loss = 1.6390e-02, PNorm = 172.7771, GNorm = 0.5474, lr_0 = 3.8441e-04
Loss = 1.4836e-02, PNorm = 172.7954, GNorm = 0.3100, lr_0 = 3.8414e-04
Loss = 1.5700e-02, PNorm = 172.8131, GNorm = 0.1968, lr_0 = 3.8388e-04
Loss = 1.6013e-02, PNorm = 172.8301, GNorm = 0.1842, lr_0 = 3.8362e-04
Loss = 1.8876e-02, PNorm = 172.8498, GNorm = 0.5141, lr_0 = 3.8336e-04
Loss = 1.6905e-02, PNorm = 172.8715, GNorm = 0.5138, lr_0 = 3.8309e-04
Loss = 1.5119e-02, PNorm = 172.8917, GNorm = 0.3203, lr_0 = 3.8283e-04
Loss = 1.8835e-02, PNorm = 172.9106, GNorm = 0.2097, lr_0 = 3.8257e-04
Loss = 1.9634e-02, PNorm = 172.9291, GNorm = 0.2936, lr_0 = 3.8231e-04
Loss = 1.3773e-02, PNorm = 172.9496, GNorm = 0.4530, lr_0 = 3.8204e-04
Loss = 2.1809e-02, PNorm = 172.9660, GNorm = 0.2015, lr_0 = 3.8178e-04
Loss = 2.1621e-02, PNorm = 172.9881, GNorm = 0.3892, lr_0 = 3.8152e-04
Loss = 1.2854e-02, PNorm = 173.0123, GNorm = 0.1328, lr_0 = 3.8126e-04
Loss = 1.6748e-02, PNorm = 173.0332, GNorm = 0.4284, lr_0 = 3.8100e-04
Loss = 1.6226e-02, PNorm = 173.0524, GNorm = 0.2361, lr_0 = 3.8074e-04
Loss = 1.7595e-02, PNorm = 173.0706, GNorm = 0.4801, lr_0 = 3.8048e-04
Loss = 1.3766e-02, PNorm = 173.0871, GNorm = 0.4441, lr_0 = 3.8022e-04
Loss = 1.7609e-02, PNorm = 173.1062, GNorm = 0.2033, lr_0 = 3.7995e-04
Loss = 1.3351e-02, PNorm = 173.1250, GNorm = 0.2360, lr_0 = 3.7969e-04
Loss = 1.2535e-02, PNorm = 173.1468, GNorm = 0.2555, lr_0 = 3.7943e-04
Loss = 1.6659e-02, PNorm = 173.1655, GNorm = 0.2302, lr_0 = 3.7917e-04
Loss = 2.1748e-02, PNorm = 173.1835, GNorm = 0.3003, lr_0 = 3.7891e-04
Loss = 1.5822e-02, PNorm = 173.2038, GNorm = 0.2158, lr_0 = 3.7866e-04
Loss = 1.7781e-02, PNorm = 173.2256, GNorm = 0.4752, lr_0 = 3.7840e-04
Loss = 1.5083e-02, PNorm = 173.2472, GNorm = 0.4008, lr_0 = 3.7814e-04
Loss = 1.5657e-02, PNorm = 173.2696, GNorm = 0.5518, lr_0 = 3.7788e-04
Loss = 1.8897e-02, PNorm = 173.2905, GNorm = 0.4697, lr_0 = 3.7762e-04
Loss = 1.7692e-02, PNorm = 173.3102, GNorm = 0.3186, lr_0 = 3.7736e-04
Loss = 1.4591e-02, PNorm = 173.3315, GNorm = 0.2218, lr_0 = 3.7710e-04
Loss = 1.1519e-02, PNorm = 173.3530, GNorm = 0.1563, lr_0 = 3.7684e-04
Loss = 1.6816e-02, PNorm = 173.3739, GNorm = 0.4748, lr_0 = 3.7659e-04
Loss = 1.8635e-02, PNorm = 173.3950, GNorm = 0.3551, lr_0 = 3.7633e-04
Loss = 1.4981e-02, PNorm = 173.4128, GNorm = 0.1495, lr_0 = 3.7607e-04
Loss = 1.6514e-02, PNorm = 173.4299, GNorm = 0.2194, lr_0 = 3.7581e-04
Loss = 1.5946e-02, PNorm = 173.4477, GNorm = 0.3130, lr_0 = 3.7555e-04
Loss = 1.4770e-02, PNorm = 173.4683, GNorm = 0.3384, lr_0 = 3.7530e-04
Loss = 1.3437e-02, PNorm = 173.4907, GNorm = 0.3278, lr_0 = 3.7504e-04
Loss = 1.9139e-02, PNorm = 173.5151, GNorm = 0.2542, lr_0 = 3.7478e-04
Loss = 2.8932e-02, PNorm = 173.5354, GNorm = 0.1588, lr_0 = 3.7453e-04
Loss = 2.0449e-02, PNorm = 173.5557, GNorm = 0.2630, lr_0 = 3.7427e-04
Loss = 1.9240e-02, PNorm = 173.5741, GNorm = 0.4506, lr_0 = 3.7401e-04
Loss = 1.5846e-02, PNorm = 173.5973, GNorm = 0.2016, lr_0 = 3.7376e-04
Loss = 1.7078e-02, PNorm = 173.6184, GNorm = 0.2472, lr_0 = 3.7350e-04
Loss = 2.0699e-02, PNorm = 173.6434, GNorm = 0.6985, lr_0 = 3.7325e-04
Loss = 1.8821e-02, PNorm = 173.6681, GNorm = 0.2707, lr_0 = 3.7299e-04
Loss = 1.9441e-02, PNorm = 173.6913, GNorm = 0.4486, lr_0 = 3.7273e-04
Validation mae = 0.121401
Epoch 14
Loss = 1.6823e-02, PNorm = 173.7077, GNorm = 0.1227, lr_0 = 3.7248e-04
Loss = 1.6214e-02, PNorm = 173.7249, GNorm = 0.5618, lr_0 = 3.7222e-04
Loss = 1.4736e-02, PNorm = 173.7439, GNorm = 0.2701, lr_0 = 3.7197e-04
Loss = 1.6090e-02, PNorm = 173.7602, GNorm = 0.1831, lr_0 = 3.7171e-04
Loss = 1.6565e-02, PNorm = 173.7782, GNorm = 0.2199, lr_0 = 3.7146e-04
Loss = 1.1994e-02, PNorm = 173.7911, GNorm = 0.2135, lr_0 = 3.7120e-04
Loss = 1.8284e-02, PNorm = 173.8040, GNorm = 0.3602, lr_0 = 3.7095e-04
Loss = 1.3418e-02, PNorm = 173.8202, GNorm = 0.3619, lr_0 = 3.7070e-04
Loss = 1.3818e-02, PNorm = 173.8343, GNorm = 0.2154, lr_0 = 3.7044e-04
Loss = 2.0857e-02, PNorm = 173.8503, GNorm = 0.1402, lr_0 = 3.7019e-04
Loss = 1.7261e-02, PNorm = 173.8640, GNorm = 0.1677, lr_0 = 3.6993e-04
Loss = 1.6954e-02, PNorm = 173.8801, GNorm = 0.5221, lr_0 = 3.6968e-04
Loss = 1.4251e-02, PNorm = 173.8918, GNorm = 0.3914, lr_0 = 3.6943e-04
Loss = 1.3173e-02, PNorm = 173.9096, GNorm = 0.2003, lr_0 = 3.6917e-04
Loss = 1.2662e-02, PNorm = 173.9236, GNorm = 0.1291, lr_0 = 3.6892e-04
Loss = 1.5842e-02, PNorm = 173.9362, GNorm = 0.3788, lr_0 = 3.6867e-04
Loss = 1.3109e-02, PNorm = 173.9518, GNorm = 0.2090, lr_0 = 3.6842e-04
Loss = 2.9709e-02, PNorm = 173.9687, GNorm = 0.1945, lr_0 = 3.6816e-04
Loss = 1.3972e-02, PNorm = 173.9804, GNorm = 0.2719, lr_0 = 3.6791e-04
Loss = 1.2308e-02, PNorm = 173.9921, GNorm = 0.3100, lr_0 = 3.6766e-04
Loss = 1.4765e-02, PNorm = 174.0052, GNorm = 0.2854, lr_0 = 3.6741e-04
Loss = 1.4105e-02, PNorm = 174.0198, GNorm = 0.2225, lr_0 = 3.6716e-04
Loss = 1.4939e-02, PNorm = 174.0344, GNorm = 0.1612, lr_0 = 3.6690e-04
Loss = 1.4324e-02, PNorm = 174.0464, GNorm = 0.5489, lr_0 = 3.6665e-04
Loss = 1.3143e-02, PNorm = 174.0605, GNorm = 0.1690, lr_0 = 3.6640e-04
Loss = 1.1614e-02, PNorm = 174.0757, GNorm = 0.1834, lr_0 = 3.6615e-04
Loss = 1.5258e-02, PNorm = 174.0885, GNorm = 0.1119, lr_0 = 3.6590e-04
Loss = 1.3043e-02, PNorm = 174.1048, GNorm = 0.1478, lr_0 = 3.6565e-04
Loss = 1.3891e-02, PNorm = 174.1244, GNorm = 0.3758, lr_0 = 3.6540e-04
Loss = 1.7728e-02, PNorm = 174.1392, GNorm = 0.2416, lr_0 = 3.6515e-04
Loss = 1.0368e-02, PNorm = 174.1517, GNorm = 0.1644, lr_0 = 3.6490e-04
Loss = 1.5815e-02, PNorm = 174.1625, GNorm = 0.1661, lr_0 = 3.6465e-04
Loss = 1.3986e-02, PNorm = 174.1733, GNorm = 0.2328, lr_0 = 3.6440e-04
Loss = 1.1475e-02, PNorm = 174.1863, GNorm = 0.3462, lr_0 = 3.6415e-04
Loss = 1.3079e-02, PNorm = 174.2015, GNorm = 0.3030, lr_0 = 3.6390e-04
Loss = 1.2671e-02, PNorm = 174.2171, GNorm = 0.1761, lr_0 = 3.6365e-04
Loss = 1.3225e-02, PNorm = 174.2354, GNorm = 0.2484, lr_0 = 3.6340e-04
Loss = 1.0562e-02, PNorm = 174.2531, GNorm = 0.2128, lr_0 = 3.6315e-04
Loss = 1.1687e-02, PNorm = 174.2699, GNorm = 0.2711, lr_0 = 3.6290e-04
Loss = 1.5975e-02, PNorm = 174.2847, GNorm = 0.3789, lr_0 = 3.6266e-04
Loss = 1.7294e-02, PNorm = 174.3015, GNorm = 0.2611, lr_0 = 3.6241e-04
Loss = 1.3439e-02, PNorm = 174.3163, GNorm = 0.2905, lr_0 = 3.6216e-04
Loss = 1.2532e-02, PNorm = 174.3323, GNorm = 0.2327, lr_0 = 3.6191e-04
Loss = 1.5076e-02, PNorm = 174.3459, GNorm = 0.5005, lr_0 = 3.6166e-04
Loss = 1.5587e-02, PNorm = 174.3568, GNorm = 0.3827, lr_0 = 3.6141e-04
Loss = 1.3204e-02, PNorm = 174.3720, GNorm = 0.2256, lr_0 = 3.6117e-04
Loss = 1.3306e-02, PNorm = 174.3891, GNorm = 0.2811, lr_0 = 3.6092e-04
Loss = 1.4642e-02, PNorm = 174.4040, GNorm = 0.2147, lr_0 = 3.6067e-04
Loss = 1.8874e-02, PNorm = 174.4208, GNorm = 0.4010, lr_0 = 3.6043e-04
Loss = 1.3859e-02, PNorm = 174.4350, GNorm = 0.1837, lr_0 = 3.6018e-04
Loss = 1.2754e-02, PNorm = 174.4493, GNorm = 0.2856, lr_0 = 3.5993e-04
Loss = 1.1460e-02, PNorm = 174.4645, GNorm = 0.1578, lr_0 = 3.5969e-04
Loss = 1.3717e-02, PNorm = 174.4761, GNorm = 0.2025, lr_0 = 3.5944e-04
Loss = 1.0905e-02, PNorm = 174.4893, GNorm = 0.2301, lr_0 = 3.5919e-04
Loss = 1.1579e-02, PNorm = 174.5036, GNorm = 0.2932, lr_0 = 3.5895e-04
Loss = 1.4050e-02, PNorm = 174.5186, GNorm = 0.2045, lr_0 = 3.5870e-04
Loss = 1.2516e-02, PNorm = 174.5331, GNorm = 0.3086, lr_0 = 3.5845e-04
Loss = 1.1675e-02, PNorm = 174.5466, GNorm = 0.2093, lr_0 = 3.5821e-04
Loss = 1.5727e-02, PNorm = 174.5595, GNorm = 0.3117, lr_0 = 3.5796e-04
Loss = 1.1919e-02, PNorm = 174.5717, GNorm = 0.2710, lr_0 = 3.5772e-04
Loss = 1.1748e-02, PNorm = 174.5851, GNorm = 0.2866, lr_0 = 3.5747e-04
Loss = 1.2589e-02, PNorm = 174.5983, GNorm = 0.8300, lr_0 = 3.5723e-04
Loss = 1.1503e-02, PNorm = 174.6111, GNorm = 0.2564, lr_0 = 3.5698e-04
Loss = 1.2012e-02, PNorm = 174.6258, GNorm = 0.1873, lr_0 = 3.5674e-04
Loss = 1.5556e-02, PNorm = 174.6412, GNorm = 0.3294, lr_0 = 3.5650e-04
Loss = 1.1581e-02, PNorm = 174.6562, GNorm = 0.4046, lr_0 = 3.5625e-04
Loss = 1.0073e-02, PNorm = 174.6725, GNorm = 0.1661, lr_0 = 3.5601e-04
Loss = 1.6130e-02, PNorm = 174.6908, GNorm = 0.5077, lr_0 = 3.5576e-04
Loss = 1.7083e-02, PNorm = 174.7102, GNorm = 0.3077, lr_0 = 3.5552e-04
Loss = 9.7986e-03, PNorm = 174.7292, GNorm = 0.2560, lr_0 = 3.5528e-04
Loss = 1.0766e-02, PNorm = 174.7464, GNorm = 0.1483, lr_0 = 3.5503e-04
Loss = 1.0897e-02, PNorm = 174.7618, GNorm = 0.1652, lr_0 = 3.5479e-04
Loss = 1.6578e-02, PNorm = 174.7783, GNorm = 0.3753, lr_0 = 3.5455e-04
Loss = 1.2874e-02, PNorm = 174.7962, GNorm = 0.2185, lr_0 = 3.5430e-04
Loss = 1.4960e-02, PNorm = 174.8136, GNorm = 0.2627, lr_0 = 3.5406e-04
Loss = 1.1672e-02, PNorm = 174.8299, GNorm = 0.2607, lr_0 = 3.5382e-04
Loss = 1.7070e-02, PNorm = 174.8442, GNorm = 0.1464, lr_0 = 3.5358e-04
Loss = 1.3096e-02, PNorm = 174.8591, GNorm = 0.3448, lr_0 = 3.5333e-04
Loss = 1.5687e-02, PNorm = 174.8707, GNorm = 0.3085, lr_0 = 3.5309e-04
Loss = 1.2769e-02, PNorm = 174.8861, GNorm = 0.4191, lr_0 = 3.5285e-04
Loss = 1.3048e-02, PNorm = 174.9020, GNorm = 0.5392, lr_0 = 3.5261e-04
Loss = 1.1598e-02, PNorm = 174.9167, GNorm = 0.2800, lr_0 = 3.5237e-04
Loss = 1.3035e-02, PNorm = 174.9361, GNorm = 0.4890, lr_0 = 3.5212e-04
Loss = 1.7753e-02, PNorm = 174.9545, GNorm = 0.2070, lr_0 = 3.5188e-04
Loss = 1.3607e-02, PNorm = 174.9660, GNorm = 0.2000, lr_0 = 3.5164e-04
Loss = 1.0411e-02, PNorm = 174.9811, GNorm = 0.1799, lr_0 = 3.5140e-04
Loss = 1.1532e-02, PNorm = 174.9931, GNorm = 0.2668, lr_0 = 3.5116e-04
Loss = 1.1131e-02, PNorm = 175.0090, GNorm = 0.2958, lr_0 = 3.5092e-04
Loss = 1.0717e-02, PNorm = 175.0254, GNorm = 0.3893, lr_0 = 3.5068e-04
Loss = 1.5740e-02, PNorm = 175.0411, GNorm = 1.2628, lr_0 = 3.5044e-04
Loss = 1.4929e-02, PNorm = 175.0589, GNorm = 0.3217, lr_0 = 3.5020e-04
Loss = 1.5343e-02, PNorm = 175.0745, GNorm = 0.2136, lr_0 = 3.4996e-04
Loss = 1.3863e-02, PNorm = 175.0918, GNorm = 0.2998, lr_0 = 3.4972e-04
Loss = 2.4476e-02, PNorm = 175.1120, GNorm = 0.7894, lr_0 = 3.4948e-04
Loss = 1.2230e-02, PNorm = 175.1276, GNorm = 0.2936, lr_0 = 3.4924e-04
Loss = 1.4595e-02, PNorm = 175.1468, GNorm = 0.2074, lr_0 = 3.4900e-04
Loss = 1.5454e-02, PNorm = 175.1645, GNorm = 0.1208, lr_0 = 3.4876e-04
Loss = 1.3028e-02, PNorm = 175.1833, GNorm = 0.1658, lr_0 = 3.4852e-04
Loss = 1.1412e-02, PNorm = 175.2001, GNorm = 0.2490, lr_0 = 3.4828e-04
Loss = 1.6625e-02, PNorm = 175.2143, GNorm = 0.1515, lr_0 = 3.4805e-04
Loss = 1.3960e-02, PNorm = 175.2283, GNorm = 0.2401, lr_0 = 3.4781e-04
Loss = 1.6883e-02, PNorm = 175.2424, GNorm = 0.2696, lr_0 = 3.4757e-04
Loss = 1.7001e-02, PNorm = 175.2578, GNorm = 0.1542, lr_0 = 3.4733e-04
Loss = 1.5909e-02, PNorm = 175.2731, GNorm = 0.0941, lr_0 = 3.4709e-04
Loss = 1.3667e-02, PNorm = 175.2889, GNorm = 0.3188, lr_0 = 3.4686e-04
Loss = 1.2641e-02, PNorm = 175.3038, GNorm = 0.3897, lr_0 = 3.4662e-04
Loss = 1.2989e-02, PNorm = 175.3216, GNorm = 0.2239, lr_0 = 3.4638e-04
Loss = 1.8937e-02, PNorm = 175.3396, GNorm = 0.1439, lr_0 = 3.4614e-04
Loss = 1.5781e-02, PNorm = 175.3553, GNorm = 0.5034, lr_0 = 3.4591e-04
Loss = 1.3594e-02, PNorm = 175.3687, GNorm = 0.3624, lr_0 = 3.4567e-04
Loss = 1.2888e-02, PNorm = 175.3836, GNorm = 0.2251, lr_0 = 3.4543e-04
Loss = 1.0550e-02, PNorm = 175.4003, GNorm = 0.1560, lr_0 = 3.4520e-04
Loss = 1.3318e-02, PNorm = 175.4146, GNorm = 0.3504, lr_0 = 3.4496e-04
Loss = 1.5231e-02, PNorm = 175.4316, GNorm = 0.2839, lr_0 = 3.4472e-04
Loss = 1.3235e-02, PNorm = 175.4490, GNorm = 0.1675, lr_0 = 3.4449e-04
Loss = 1.6141e-02, PNorm = 175.4718, GNorm = 0.2383, lr_0 = 3.4425e-04
Loss = 1.3934e-02, PNorm = 175.4934, GNorm = 0.1626, lr_0 = 3.4402e-04
Loss = 1.7774e-02, PNorm = 175.5144, GNorm = 0.6395, lr_0 = 3.4378e-04
Loss = 1.9453e-02, PNorm = 175.5344, GNorm = 0.6724, lr_0 = 3.4354e-04
Loss = 1.3177e-02, PNorm = 175.5501, GNorm = 0.4008, lr_0 = 3.4331e-04
Validation mae = 0.121299
Epoch 15
Loss = 1.1143e-02, PNorm = 175.5657, GNorm = 0.1524, lr_0 = 3.4307e-04
Loss = 1.1315e-02, PNorm = 175.5753, GNorm = 0.2305, lr_0 = 3.4284e-04
Loss = 1.0525e-02, PNorm = 175.5857, GNorm = 0.1722, lr_0 = 3.4260e-04
Loss = 2.4712e-02, PNorm = 175.5981, GNorm = 0.6011, lr_0 = 3.4237e-04
Loss = 1.1726e-02, PNorm = 175.6074, GNorm = 0.2461, lr_0 = 3.4213e-04
Loss = 1.1397e-02, PNorm = 175.6179, GNorm = 0.1808, lr_0 = 3.4190e-04
Loss = 1.1123e-02, PNorm = 175.6312, GNorm = 0.1889, lr_0 = 3.4167e-04
Loss = 9.8958e-03, PNorm = 175.6437, GNorm = 0.1904, lr_0 = 3.4143e-04
Loss = 1.0770e-02, PNorm = 175.6567, GNorm = 0.5149, lr_0 = 3.4120e-04
Loss = 1.2697e-02, PNorm = 175.6686, GNorm = 0.1981, lr_0 = 3.4096e-04
Loss = 1.4393e-02, PNorm = 175.6811, GNorm = 0.2858, lr_0 = 3.4073e-04
Loss = 1.3826e-02, PNorm = 175.6953, GNorm = 0.5100, lr_0 = 3.4050e-04
Loss = 1.3787e-02, PNorm = 175.7064, GNorm = 0.1380, lr_0 = 3.4026e-04
Loss = 1.5861e-02, PNorm = 175.7191, GNorm = 0.1961, lr_0 = 3.4003e-04
Loss = 1.4546e-02, PNorm = 175.7286, GNorm = 0.1836, lr_0 = 3.3980e-04
Loss = 1.4024e-02, PNorm = 175.7426, GNorm = 0.2017, lr_0 = 3.3956e-04
Loss = 1.0274e-02, PNorm = 175.7563, GNorm = 0.1822, lr_0 = 3.3933e-04
Loss = 1.1887e-02, PNorm = 175.7668, GNorm = 0.1149, lr_0 = 3.3910e-04
Loss = 1.0795e-02, PNorm = 175.7777, GNorm = 0.1526, lr_0 = 3.3887e-04
Loss = 1.2617e-02, PNorm = 175.7902, GNorm = 0.2869, lr_0 = 3.3864e-04
Loss = 1.0453e-02, PNorm = 175.8026, GNorm = 0.1768, lr_0 = 3.3840e-04
Loss = 1.2972e-02, PNorm = 175.8147, GNorm = 0.4418, lr_0 = 3.3817e-04
Loss = 9.3898e-03, PNorm = 175.8251, GNorm = 0.1634, lr_0 = 3.3794e-04
Loss = 9.6680e-03, PNorm = 175.8361, GNorm = 0.2499, lr_0 = 3.3771e-04
Loss = 9.5185e-03, PNorm = 175.8473, GNorm = 0.2777, lr_0 = 3.3748e-04
Loss = 2.5070e-02, PNorm = 175.8607, GNorm = 0.4552, lr_0 = 3.3725e-04
Loss = 1.2748e-02, PNorm = 175.8705, GNorm = 0.5239, lr_0 = 3.3701e-04
Loss = 1.3705e-02, PNorm = 175.8846, GNorm = 0.1728, lr_0 = 3.3678e-04
Loss = 9.2243e-03, PNorm = 175.8964, GNorm = 0.1246, lr_0 = 3.3655e-04
Loss = 1.0889e-02, PNorm = 175.9078, GNorm = 0.4228, lr_0 = 3.3632e-04
Loss = 1.0619e-02, PNorm = 175.9195, GNorm = 0.1237, lr_0 = 3.3609e-04
Loss = 1.3288e-02, PNorm = 175.9300, GNorm = 0.2150, lr_0 = 3.3586e-04
Loss = 9.4100e-03, PNorm = 175.9419, GNorm = 0.1752, lr_0 = 3.3563e-04
Loss = 1.3603e-02, PNorm = 175.9591, GNorm = 0.2390, lr_0 = 3.3540e-04
Loss = 9.2821e-03, PNorm = 175.9712, GNorm = 0.3502, lr_0 = 3.3517e-04
Loss = 1.2355e-02, PNorm = 175.9838, GNorm = 0.3702, lr_0 = 3.3494e-04
Loss = 9.0758e-03, PNorm = 175.9977, GNorm = 0.2755, lr_0 = 3.3471e-04
Loss = 1.3600e-02, PNorm = 176.0119, GNorm = 0.1849, lr_0 = 3.3448e-04
Loss = 1.3111e-02, PNorm = 176.0250, GNorm = 0.3452, lr_0 = 3.3425e-04
Loss = 1.5345e-02, PNorm = 176.0376, GNorm = 0.4869, lr_0 = 3.3403e-04
Loss = 9.8943e-03, PNorm = 176.0500, GNorm = 0.2840, lr_0 = 3.3380e-04
Loss = 1.5192e-02, PNorm = 176.0641, GNorm = 0.1983, lr_0 = 3.3357e-04
Loss = 1.0511e-02, PNorm = 176.0749, GNorm = 0.1232, lr_0 = 3.3334e-04
Loss = 1.5786e-02, PNorm = 176.0874, GNorm = 0.7843, lr_0 = 3.3311e-04
Loss = 1.2847e-02, PNorm = 176.1013, GNorm = 0.3035, lr_0 = 3.3288e-04
Loss = 1.0095e-02, PNorm = 176.1134, GNorm = 0.1818, lr_0 = 3.3265e-04
Loss = 1.1267e-02, PNorm = 176.1277, GNorm = 0.2515, lr_0 = 3.3243e-04
Loss = 1.5387e-02, PNorm = 176.1400, GNorm = 0.5164, lr_0 = 3.3220e-04
Loss = 1.9466e-02, PNorm = 176.1548, GNorm = 0.5732, lr_0 = 3.3197e-04
Loss = 1.4403e-02, PNorm = 176.1678, GNorm = 0.4170, lr_0 = 3.3174e-04
Loss = 1.1445e-02, PNorm = 176.1810, GNorm = 0.2861, lr_0 = 3.3152e-04
Loss = 1.2768e-02, PNorm = 176.1913, GNorm = 0.3513, lr_0 = 3.3129e-04
Loss = 8.6103e-03, PNorm = 176.2020, GNorm = 0.3130, lr_0 = 3.3106e-04
Loss = 1.3713e-02, PNorm = 176.2165, GNorm = 0.4197, lr_0 = 3.3084e-04
Loss = 1.4273e-02, PNorm = 176.2282, GNorm = 0.2627, lr_0 = 3.3061e-04
Loss = 1.2875e-02, PNorm = 176.2397, GNorm = 0.1846, lr_0 = 3.3038e-04
Loss = 1.0612e-02, PNorm = 176.2538, GNorm = 0.2177, lr_0 = 3.3016e-04
Loss = 1.2984e-02, PNorm = 176.2727, GNorm = 0.2235, lr_0 = 3.2993e-04
Loss = 1.1126e-02, PNorm = 176.2911, GNorm = 0.5051, lr_0 = 3.2970e-04
Loss = 9.8376e-03, PNorm = 176.3071, GNorm = 0.1389, lr_0 = 3.2948e-04
Loss = 9.2934e-03, PNorm = 176.3214, GNorm = 0.2069, lr_0 = 3.2925e-04
Loss = 1.0225e-02, PNorm = 176.3340, GNorm = 0.1667, lr_0 = 3.2903e-04
Loss = 1.2019e-02, PNorm = 176.3473, GNorm = 0.1875, lr_0 = 3.2880e-04
Loss = 8.9592e-03, PNorm = 176.3587, GNorm = 0.2196, lr_0 = 3.2858e-04
Loss = 1.8368e-02, PNorm = 176.3698, GNorm = 0.2804, lr_0 = 3.2835e-04
Loss = 9.5693e-03, PNorm = 176.3866, GNorm = 0.1976, lr_0 = 3.2813e-04
Loss = 1.2597e-02, PNorm = 176.4005, GNorm = 0.2193, lr_0 = 3.2790e-04
Loss = 1.2253e-02, PNorm = 176.4156, GNorm = 0.2630, lr_0 = 3.2768e-04
Loss = 1.7038e-02, PNorm = 176.4297, GNorm = 0.1829, lr_0 = 3.2745e-04
Loss = 1.1382e-02, PNorm = 176.4451, GNorm = 0.1411, lr_0 = 3.2723e-04
Loss = 1.0683e-02, PNorm = 176.4597, GNorm = 0.2262, lr_0 = 3.2700e-04
Loss = 1.3295e-02, PNorm = 176.4717, GNorm = 0.2443, lr_0 = 3.2678e-04
Loss = 1.0472e-02, PNorm = 176.4860, GNorm = 0.4087, lr_0 = 3.2656e-04
Loss = 1.2070e-02, PNorm = 176.5009, GNorm = 0.1681, lr_0 = 3.2633e-04
Loss = 1.1989e-02, PNorm = 176.5170, GNorm = 0.2808, lr_0 = 3.2611e-04
Loss = 1.2240e-02, PNorm = 176.5329, GNorm = 0.2419, lr_0 = 3.2589e-04
Loss = 1.6448e-02, PNorm = 176.5476, GNorm = 0.2485, lr_0 = 3.2566e-04
Loss = 1.3543e-02, PNorm = 176.5601, GNorm = 0.2544, lr_0 = 3.2544e-04
Loss = 8.2280e-03, PNorm = 176.5729, GNorm = 0.1947, lr_0 = 3.2522e-04
Loss = 1.6253e-02, PNorm = 176.5857, GNorm = 0.9829, lr_0 = 3.2499e-04
Loss = 7.2927e-03, PNorm = 176.5985, GNorm = 0.3174, lr_0 = 3.2477e-04
Loss = 9.1715e-03, PNorm = 176.6129, GNorm = 0.1380, lr_0 = 3.2455e-04
Loss = 1.3012e-02, PNorm = 176.6237, GNorm = 0.2022, lr_0 = 3.2433e-04
Loss = 1.1164e-02, PNorm = 176.6357, GNorm = 0.2020, lr_0 = 3.2410e-04
Loss = 1.1347e-02, PNorm = 176.6493, GNorm = 0.1379, lr_0 = 3.2388e-04
Loss = 9.4090e-03, PNorm = 176.6647, GNorm = 0.2163, lr_0 = 3.2366e-04
Loss = 1.1606e-02, PNorm = 176.6816, GNorm = 0.1839, lr_0 = 3.2344e-04
Loss = 1.3014e-02, PNorm = 176.6991, GNorm = 0.8686, lr_0 = 3.2322e-04
Loss = 8.1923e-03, PNorm = 176.7124, GNorm = 0.2511, lr_0 = 3.2300e-04
Loss = 2.1340e-02, PNorm = 176.7199, GNorm = 0.3640, lr_0 = 3.2277e-04
Loss = 1.5966e-02, PNorm = 176.7350, GNorm = 0.8300, lr_0 = 3.2255e-04
Loss = 1.2018e-02, PNorm = 176.7499, GNorm = 0.1866, lr_0 = 3.2233e-04
Loss = 9.5875e-03, PNorm = 176.7664, GNorm = 0.2830, lr_0 = 3.2211e-04
Loss = 9.0015e-03, PNorm = 176.7794, GNorm = 0.1503, lr_0 = 3.2189e-04
Loss = 1.3353e-02, PNorm = 176.7919, GNorm = 0.8522, lr_0 = 3.2167e-04
Loss = 1.1788e-02, PNorm = 176.8053, GNorm = 0.2866, lr_0 = 3.2145e-04
Loss = 1.2682e-02, PNorm = 176.8190, GNorm = 0.2418, lr_0 = 3.2123e-04
Loss = 1.2179e-02, PNorm = 176.8292, GNorm = 0.3367, lr_0 = 3.2101e-04
Loss = 1.0849e-02, PNorm = 176.8408, GNorm = 0.2114, lr_0 = 3.2079e-04
Loss = 9.2485e-03, PNorm = 176.8560, GNorm = 0.3715, lr_0 = 3.2057e-04
Loss = 1.0358e-02, PNorm = 176.8679, GNorm = 0.1996, lr_0 = 3.2035e-04
Loss = 1.4794e-02, PNorm = 176.8840, GNorm = 0.4810, lr_0 = 3.2013e-04
Loss = 1.4341e-02, PNorm = 176.8988, GNorm = 0.2395, lr_0 = 3.1991e-04
Loss = 8.6686e-03, PNorm = 176.9118, GNorm = 0.1140, lr_0 = 3.1969e-04
Loss = 1.9553e-02, PNorm = 176.9248, GNorm = 0.4048, lr_0 = 3.1947e-04
Loss = 9.8685e-03, PNorm = 176.9390, GNorm = 0.1490, lr_0 = 3.1925e-04
Loss = 1.1933e-02, PNorm = 176.9537, GNorm = 0.2576, lr_0 = 3.1904e-04
Loss = 1.5427e-02, PNorm = 176.9625, GNorm = 0.3017, lr_0 = 3.1882e-04
Loss = 1.5609e-02, PNorm = 176.9748, GNorm = 0.2159, lr_0 = 3.1860e-04
Loss = 1.2342e-02, PNorm = 176.9881, GNorm = 0.1855, lr_0 = 3.1838e-04
Loss = 9.7157e-03, PNorm = 177.0032, GNorm = 0.3787, lr_0 = 3.1816e-04
Loss = 1.0978e-02, PNorm = 177.0174, GNorm = 0.2564, lr_0 = 3.1794e-04
Loss = 1.7325e-02, PNorm = 177.0332, GNorm = 0.2280, lr_0 = 3.1773e-04
Loss = 1.8589e-02, PNorm = 177.0477, GNorm = 0.5823, lr_0 = 3.1751e-04
Loss = 1.1110e-02, PNorm = 177.0630, GNorm = 0.1664, lr_0 = 3.1729e-04
Loss = 1.0119e-02, PNorm = 177.0773, GNorm = 0.3336, lr_0 = 3.1707e-04
Loss = 1.1666e-02, PNorm = 177.0946, GNorm = 0.3299, lr_0 = 3.1686e-04
Loss = 1.0699e-02, PNorm = 177.1095, GNorm = 0.1906, lr_0 = 3.1664e-04
Loss = 1.0202e-02, PNorm = 177.1230, GNorm = 0.3214, lr_0 = 3.1642e-04
Loss = 9.1446e-03, PNorm = 177.1356, GNorm = 0.2193, lr_0 = 3.1621e-04
Validation mae = 0.121353
Epoch 16
Loss = 7.9384e-03, PNorm = 177.1474, GNorm = 0.1969, lr_0 = 3.1599e-04
Loss = 8.2121e-03, PNorm = 177.1588, GNorm = 0.1510, lr_0 = 3.1577e-04
Loss = 1.0453e-02, PNorm = 177.1704, GNorm = 0.2338, lr_0 = 3.1556e-04
Loss = 9.6835e-03, PNorm = 177.1793, GNorm = 0.1766, lr_0 = 3.1534e-04
Loss = 1.0780e-02, PNorm = 177.1879, GNorm = 0.1909, lr_0 = 3.1512e-04
Loss = 8.5830e-03, PNorm = 177.1951, GNorm = 0.2609, lr_0 = 3.1491e-04
Loss = 1.4718e-02, PNorm = 177.2049, GNorm = 0.4917, lr_0 = 3.1469e-04
Loss = 1.0445e-02, PNorm = 177.2143, GNorm = 0.1276, lr_0 = 3.1448e-04
Loss = 8.0637e-03, PNorm = 177.2233, GNorm = 0.2769, lr_0 = 3.1426e-04
Loss = 1.1915e-02, PNorm = 177.2352, GNorm = 0.2241, lr_0 = 3.1405e-04
Loss = 9.3377e-03, PNorm = 177.2460, GNorm = 0.2998, lr_0 = 3.1383e-04
Loss = 1.3446e-02, PNorm = 177.2542, GNorm = 0.5550, lr_0 = 3.1362e-04
Loss = 1.3440e-02, PNorm = 177.2644, GNorm = 0.1731, lr_0 = 3.1340e-04
Loss = 1.1696e-02, PNorm = 177.2744, GNorm = 0.2145, lr_0 = 3.1319e-04
Loss = 1.3098e-02, PNorm = 177.2848, GNorm = 0.2469, lr_0 = 3.1297e-04
Loss = 1.1633e-02, PNorm = 177.2945, GNorm = 0.2928, lr_0 = 3.1276e-04
Loss = 9.0598e-03, PNorm = 177.3047, GNorm = 0.3937, lr_0 = 3.1254e-04
Loss = 8.4217e-03, PNorm = 177.3160, GNorm = 0.3892, lr_0 = 3.1233e-04
Loss = 1.3413e-02, PNorm = 177.3261, GNorm = 0.2780, lr_0 = 3.1212e-04
Loss = 8.8467e-03, PNorm = 177.3356, GNorm = 0.2149, lr_0 = 3.1190e-04
Loss = 1.0319e-02, PNorm = 177.3464, GNorm = 0.2581, lr_0 = 3.1169e-04
Loss = 9.6862e-03, PNorm = 177.3591, GNorm = 0.3296, lr_0 = 3.1147e-04
Loss = 8.2711e-03, PNorm = 177.3717, GNorm = 0.2232, lr_0 = 3.1126e-04
Loss = 1.2923e-02, PNorm = 177.3818, GNorm = 0.5461, lr_0 = 3.1105e-04
Loss = 8.3809e-03, PNorm = 177.3929, GNorm = 0.1743, lr_0 = 3.1083e-04
Loss = 1.1448e-02, PNorm = 177.4016, GNorm = 0.3626, lr_0 = 3.1062e-04
Loss = 9.5828e-03, PNorm = 177.4120, GNorm = 0.2601, lr_0 = 3.1041e-04
Loss = 9.5039e-03, PNorm = 177.4202, GNorm = 0.1480, lr_0 = 3.1020e-04
Loss = 1.0995e-02, PNorm = 177.4266, GNorm = 0.3970, lr_0 = 3.0998e-04
Loss = 1.8629e-02, PNorm = 177.4367, GNorm = 0.2921, lr_0 = 3.0977e-04
Loss = 9.5724e-03, PNorm = 177.4477, GNorm = 0.1292, lr_0 = 3.0956e-04
Loss = 9.6154e-03, PNorm = 177.4571, GNorm = 0.2768, lr_0 = 3.0935e-04
Loss = 1.3345e-02, PNorm = 177.4669, GNorm = 0.1797, lr_0 = 3.0914e-04
Loss = 1.3576e-02, PNorm = 177.4741, GNorm = 0.2037, lr_0 = 3.0892e-04
Loss = 1.0213e-02, PNorm = 177.4834, GNorm = 0.3025, lr_0 = 3.0871e-04
Loss = 8.5557e-03, PNorm = 177.4948, GNorm = 0.2903, lr_0 = 3.0850e-04
Loss = 9.6598e-03, PNorm = 177.5023, GNorm = 0.1222, lr_0 = 3.0829e-04
Loss = 9.8469e-03, PNorm = 177.5095, GNorm = 0.2763, lr_0 = 3.0808e-04
Loss = 9.5407e-03, PNorm = 177.5208, GNorm = 0.5287, lr_0 = 3.0787e-04
Loss = 1.3262e-02, PNorm = 177.5319, GNorm = 0.3107, lr_0 = 3.0766e-04
Loss = 1.4335e-02, PNorm = 177.5450, GNorm = 0.1753, lr_0 = 3.0745e-04
Loss = 8.6764e-03, PNorm = 177.5577, GNorm = 0.5224, lr_0 = 3.0723e-04
Loss = 1.3532e-02, PNorm = 177.5670, GNorm = 0.3597, lr_0 = 3.0702e-04
Loss = 1.4240e-02, PNorm = 177.5780, GNorm = 0.7172, lr_0 = 3.0681e-04
Loss = 1.0421e-02, PNorm = 177.5876, GNorm = 0.2129, lr_0 = 3.0660e-04
Loss = 8.3449e-03, PNorm = 177.5974, GNorm = 0.1524, lr_0 = 3.0639e-04
Loss = 1.5789e-02, PNorm = 177.6107, GNorm = 0.2200, lr_0 = 3.0618e-04
Loss = 9.0911e-03, PNorm = 177.6231, GNorm = 0.1706, lr_0 = 3.0597e-04
Loss = 1.2570e-02, PNorm = 177.6378, GNorm = 0.2352, lr_0 = 3.0576e-04
Loss = 1.0660e-02, PNorm = 177.6512, GNorm = 0.1791, lr_0 = 3.0555e-04
Loss = 1.3750e-02, PNorm = 177.6635, GNorm = 0.1946, lr_0 = 3.0535e-04
Loss = 8.7340e-03, PNorm = 177.6740, GNorm = 0.4135, lr_0 = 3.0514e-04
Loss = 1.1166e-02, PNorm = 177.6844, GNorm = 0.6707, lr_0 = 3.0493e-04
Loss = 8.4767e-03, PNorm = 177.6970, GNorm = 0.2738, lr_0 = 3.0472e-04
Loss = 1.1742e-02, PNorm = 177.7066, GNorm = 0.1414, lr_0 = 3.0451e-04
Loss = 9.4630e-03, PNorm = 177.7182, GNorm = 0.1235, lr_0 = 3.0430e-04
Loss = 9.1658e-03, PNorm = 177.7295, GNorm = 0.2902, lr_0 = 3.0409e-04
Loss = 1.0091e-02, PNorm = 177.7400, GNorm = 0.1516, lr_0 = 3.0388e-04
Loss = 7.6669e-03, PNorm = 177.7491, GNorm = 0.1907, lr_0 = 3.0368e-04
Loss = 9.5742e-03, PNorm = 177.7582, GNorm = 0.3429, lr_0 = 3.0347e-04
Loss = 1.1501e-02, PNorm = 177.7696, GNorm = 0.4580, lr_0 = 3.0326e-04
Loss = 8.8953e-03, PNorm = 177.7815, GNorm = 0.2114, lr_0 = 3.0305e-04
Loss = 8.5929e-03, PNorm = 177.7911, GNorm = 0.2627, lr_0 = 3.0284e-04
Loss = 1.2178e-02, PNorm = 177.8003, GNorm = 0.3984, lr_0 = 3.0264e-04
Loss = 1.1523e-02, PNorm = 177.8123, GNorm = 0.1658, lr_0 = 3.0243e-04
Loss = 1.3648e-02, PNorm = 177.8225, GNorm = 0.3474, lr_0 = 3.0222e-04
Loss = 9.9963e-03, PNorm = 177.8365, GNorm = 0.1259, lr_0 = 3.0202e-04
Loss = 1.1108e-02, PNorm = 177.8511, GNorm = 0.2512, lr_0 = 3.0181e-04
Loss = 1.1868e-02, PNorm = 177.8620, GNorm = 0.2199, lr_0 = 3.0160e-04
Loss = 7.8110e-03, PNorm = 177.8739, GNorm = 0.4256, lr_0 = 3.0140e-04
Loss = 1.4254e-02, PNorm = 177.8851, GNorm = 0.1212, lr_0 = 3.0119e-04
Loss = 9.8983e-03, PNorm = 177.8927, GNorm = 0.2700, lr_0 = 3.0098e-04
Loss = 1.1543e-02, PNorm = 177.9025, GNorm = 0.1641, lr_0 = 3.0078e-04
Loss = 1.5505e-02, PNorm = 177.9124, GNorm = 0.1309, lr_0 = 3.0057e-04
Loss = 1.1715e-02, PNorm = 177.9221, GNorm = 0.2062, lr_0 = 3.0036e-04
Loss = 8.8584e-03, PNorm = 177.9340, GNorm = 0.2671, lr_0 = 3.0016e-04
Loss = 1.2691e-02, PNorm = 177.9456, GNorm = 0.2057, lr_0 = 2.9995e-04
Loss = 7.9136e-03, PNorm = 177.9587, GNorm = 0.1697, lr_0 = 2.9975e-04
Loss = 1.2250e-02, PNorm = 177.9710, GNorm = 0.2444, lr_0 = 2.9954e-04
Loss = 8.9931e-03, PNorm = 177.9819, GNorm = 0.1690, lr_0 = 2.9934e-04
Loss = 1.4450e-02, PNorm = 177.9914, GNorm = 0.2335, lr_0 = 2.9913e-04
Loss = 1.2571e-02, PNorm = 178.0031, GNorm = 0.2819, lr_0 = 2.9893e-04
Loss = 1.4574e-02, PNorm = 178.0126, GNorm = 0.1377, lr_0 = 2.9872e-04
Loss = 1.4632e-02, PNorm = 178.0237, GNorm = 0.1998, lr_0 = 2.9852e-04
Loss = 9.5599e-03, PNorm = 178.0381, GNorm = 0.2676, lr_0 = 2.9831e-04
Loss = 8.8808e-03, PNorm = 178.0494, GNorm = 0.1451, lr_0 = 2.9811e-04
Loss = 1.0658e-02, PNorm = 178.0607, GNorm = 0.1338, lr_0 = 2.9790e-04
Loss = 1.0272e-02, PNorm = 178.0720, GNorm = 0.2099, lr_0 = 2.9770e-04
Loss = 1.0324e-02, PNorm = 178.0815, GNorm = 0.2953, lr_0 = 2.9750e-04
Loss = 1.5562e-02, PNorm = 178.0924, GNorm = 0.1419, lr_0 = 2.9729e-04
Loss = 7.3626e-03, PNorm = 178.1040, GNorm = 0.1923, lr_0 = 2.9709e-04
Loss = 1.2492e-02, PNorm = 178.1161, GNorm = 0.2015, lr_0 = 2.9689e-04
Loss = 9.3038e-03, PNorm = 178.1305, GNorm = 0.1321, lr_0 = 2.9668e-04
Loss = 9.7341e-03, PNorm = 178.1424, GNorm = 0.1984, lr_0 = 2.9648e-04
Loss = 8.5672e-03, PNorm = 178.1530, GNorm = 0.2518, lr_0 = 2.9628e-04
Loss = 7.3057e-03, PNorm = 178.1618, GNorm = 0.1482, lr_0 = 2.9607e-04
Loss = 9.8317e-03, PNorm = 178.1710, GNorm = 0.3618, lr_0 = 2.9587e-04
Loss = 1.1133e-02, PNorm = 178.1839, GNorm = 0.2168, lr_0 = 2.9567e-04
Loss = 8.6484e-03, PNorm = 178.1954, GNorm = 0.2309, lr_0 = 2.9546e-04
Loss = 1.4345e-02, PNorm = 178.2080, GNorm = 0.3783, lr_0 = 2.9526e-04
Loss = 2.0549e-02, PNorm = 178.2190, GNorm = 0.3502, lr_0 = 2.9506e-04
Loss = 1.0801e-02, PNorm = 178.2290, GNorm = 0.1282, lr_0 = 2.9486e-04
Loss = 1.2035e-02, PNorm = 178.2392, GNorm = 0.1571, lr_0 = 2.9466e-04
Loss = 1.0775e-02, PNorm = 178.2531, GNorm = 0.2478, lr_0 = 2.9445e-04
Loss = 7.8513e-03, PNorm = 178.2645, GNorm = 0.1137, lr_0 = 2.9425e-04
Loss = 8.0722e-03, PNorm = 178.2758, GNorm = 0.2382, lr_0 = 2.9405e-04
Loss = 9.5485e-03, PNorm = 178.2855, GNorm = 0.3117, lr_0 = 2.9385e-04
Loss = 1.2227e-02, PNorm = 178.2932, GNorm = 0.1383, lr_0 = 2.9365e-04
Loss = 9.4420e-03, PNorm = 178.3033, GNorm = 0.1422, lr_0 = 2.9345e-04
Loss = 7.4198e-03, PNorm = 178.3147, GNorm = 0.2671, lr_0 = 2.9325e-04
Loss = 1.2808e-02, PNorm = 178.3261, GNorm = 0.0958, lr_0 = 2.9305e-04
Loss = 9.8595e-03, PNorm = 178.3402, GNorm = 0.1628, lr_0 = 2.9284e-04
Loss = 9.9737e-03, PNorm = 178.3529, GNorm = 0.3052, lr_0 = 2.9264e-04
Loss = 1.0167e-02, PNorm = 178.3650, GNorm = 0.2755, lr_0 = 2.9244e-04
Loss = 1.0889e-02, PNorm = 178.3734, GNorm = 0.1886, lr_0 = 2.9224e-04
Loss = 1.1540e-02, PNorm = 178.3826, GNorm = 0.1076, lr_0 = 2.9204e-04
Loss = 1.4059e-02, PNorm = 178.3924, GNorm = 0.2007, lr_0 = 2.9184e-04
Loss = 1.0222e-02, PNorm = 178.4071, GNorm = 0.2146, lr_0 = 2.9164e-04
Loss = 1.2064e-02, PNorm = 178.4201, GNorm = 0.4422, lr_0 = 2.9144e-04
Loss = 9.5577e-03, PNorm = 178.4327, GNorm = 0.2772, lr_0 = 2.9124e-04
Validation mae = 0.120991
Epoch 17
Loss = 6.8676e-03, PNorm = 178.4420, GNorm = 0.2058, lr_0 = 2.9104e-04
Loss = 7.7051e-03, PNorm = 178.4493, GNorm = 0.1432, lr_0 = 2.9084e-04
Loss = 9.2908e-03, PNorm = 178.4565, GNorm = 0.2313, lr_0 = 2.9065e-04
Loss = 9.6917e-03, PNorm = 178.4642, GNorm = 1.5015, lr_0 = 2.9045e-04
Loss = 1.2773e-02, PNorm = 178.4764, GNorm = 0.2154, lr_0 = 2.9025e-04
Loss = 8.5121e-03, PNorm = 178.4838, GNorm = 0.2898, lr_0 = 2.9005e-04
Loss = 9.5510e-03, PNorm = 178.4898, GNorm = 0.1115, lr_0 = 2.8985e-04
Loss = 1.0259e-02, PNorm = 178.4981, GNorm = 0.2022, lr_0 = 2.8965e-04
Loss = 8.1785e-03, PNorm = 178.5086, GNorm = 0.3489, lr_0 = 2.8945e-04
Loss = 8.3589e-03, PNorm = 178.5171, GNorm = 0.3479, lr_0 = 2.8925e-04
Loss = 8.9760e-03, PNorm = 178.5230, GNorm = 0.1677, lr_0 = 2.8906e-04
Loss = 2.2571e-02, PNorm = 178.5313, GNorm = 0.3117, lr_0 = 2.8886e-04
Loss = 7.7650e-03, PNorm = 178.5370, GNorm = 0.3695, lr_0 = 2.8866e-04
Loss = 9.5190e-03, PNorm = 178.5464, GNorm = 0.2152, lr_0 = 2.8846e-04
Loss = 6.7687e-03, PNorm = 178.5565, GNorm = 0.1699, lr_0 = 2.8826e-04
Loss = 9.4713e-03, PNorm = 178.5652, GNorm = 0.7194, lr_0 = 2.8807e-04
Loss = 8.3337e-03, PNorm = 178.5736, GNorm = 0.2531, lr_0 = 2.8787e-04
Loss = 7.6979e-03, PNorm = 178.5815, GNorm = 0.1830, lr_0 = 2.8767e-04
Loss = 7.8105e-03, PNorm = 178.5898, GNorm = 0.1729, lr_0 = 2.8748e-04
Loss = 9.5381e-03, PNorm = 178.5991, GNorm = 0.4269, lr_0 = 2.8728e-04
Loss = 6.5274e-03, PNorm = 178.6082, GNorm = 0.1251, lr_0 = 2.8708e-04
Loss = 1.0535e-02, PNorm = 178.6165, GNorm = 0.1213, lr_0 = 2.8689e-04
Loss = 7.7315e-03, PNorm = 178.6243, GNorm = 0.1711, lr_0 = 2.8669e-04
Loss = 9.0504e-03, PNorm = 178.6324, GNorm = 0.2406, lr_0 = 2.8649e-04
Loss = 8.0344e-03, PNorm = 178.6421, GNorm = 0.1235, lr_0 = 2.8630e-04
Loss = 9.2652e-03, PNorm = 178.6516, GNorm = 0.2466, lr_0 = 2.8610e-04
Loss = 8.6806e-03, PNorm = 178.6591, GNorm = 0.5137, lr_0 = 2.8590e-04
Loss = 1.1537e-02, PNorm = 178.6679, GNorm = 0.2571, lr_0 = 2.8571e-04
Loss = 8.8840e-03, PNorm = 178.6747, GNorm = 0.3996, lr_0 = 2.8551e-04
Loss = 1.0219e-02, PNorm = 178.6835, GNorm = 0.3860, lr_0 = 2.8532e-04
Loss = 6.7253e-03, PNorm = 178.6940, GNorm = 0.3332, lr_0 = 2.8512e-04
Loss = 1.2906e-02, PNorm = 178.7046, GNorm = 0.1758, lr_0 = 2.8493e-04
Loss = 1.0713e-02, PNorm = 178.7147, GNorm = 0.2872, lr_0 = 2.8473e-04
Loss = 1.1798e-02, PNorm = 178.7217, GNorm = 0.1805, lr_0 = 2.8454e-04
Loss = 1.2508e-02, PNorm = 178.7312, GNorm = 0.1344, lr_0 = 2.8434e-04
Loss = 7.3380e-03, PNorm = 178.7398, GNorm = 0.2137, lr_0 = 2.8415e-04
Loss = 7.9833e-03, PNorm = 178.7489, GNorm = 0.1534, lr_0 = 2.8395e-04
Loss = 7.1877e-03, PNorm = 178.7577, GNorm = 0.2363, lr_0 = 2.8376e-04
Loss = 9.6971e-03, PNorm = 178.7657, GNorm = 0.1571, lr_0 = 2.8356e-04
Loss = 6.5126e-03, PNorm = 178.7747, GNorm = 0.1309, lr_0 = 2.8337e-04
Loss = 1.0194e-02, PNorm = 178.7837, GNorm = 0.3585, lr_0 = 2.8317e-04
Loss = 1.1094e-02, PNorm = 178.7930, GNorm = 0.1388, lr_0 = 2.8298e-04
Loss = 7.3386e-03, PNorm = 178.8002, GNorm = 0.2148, lr_0 = 2.8279e-04
Loss = 1.2158e-02, PNorm = 178.8085, GNorm = 0.3676, lr_0 = 2.8259e-04
Loss = 8.5526e-03, PNorm = 178.8178, GNorm = 0.2454, lr_0 = 2.8240e-04
Loss = 9.6048e-03, PNorm = 178.8268, GNorm = 0.1922, lr_0 = 2.8221e-04
Loss = 1.1117e-02, PNorm = 178.8360, GNorm = 0.1196, lr_0 = 2.8201e-04
Loss = 7.5272e-03, PNorm = 178.8457, GNorm = 0.3300, lr_0 = 2.8182e-04
Loss = 7.5970e-03, PNorm = 178.8548, GNorm = 0.2282, lr_0 = 2.8163e-04
Loss = 7.5067e-03, PNorm = 178.8636, GNorm = 0.2154, lr_0 = 2.8143e-04
Loss = 8.1275e-03, PNorm = 178.8716, GNorm = 0.3634, lr_0 = 2.8124e-04
Loss = 7.3258e-03, PNorm = 178.8789, GNorm = 0.2567, lr_0 = 2.8105e-04
Loss = 7.2360e-03, PNorm = 178.8859, GNorm = 0.3320, lr_0 = 2.8085e-04
Loss = 1.3040e-02, PNorm = 178.8927, GNorm = 1.4509, lr_0 = 2.8066e-04
Loss = 1.7218e-02, PNorm = 178.8998, GNorm = 0.1905, lr_0 = 2.8047e-04
Loss = 6.4825e-03, PNorm = 178.9095, GNorm = 0.2185, lr_0 = 2.8028e-04
Loss = 1.1093e-02, PNorm = 178.9170, GNorm = 0.2819, lr_0 = 2.8009e-04
Loss = 9.0525e-03, PNorm = 178.9251, GNorm = 0.5688, lr_0 = 2.7989e-04
Loss = 8.8550e-03, PNorm = 178.9362, GNorm = 0.1787, lr_0 = 2.7970e-04
Loss = 9.9635e-03, PNorm = 178.9447, GNorm = 0.9720, lr_0 = 2.7951e-04
Loss = 6.4805e-03, PNorm = 178.9541, GNorm = 0.1540, lr_0 = 2.7932e-04
Loss = 8.0214e-03, PNorm = 178.9644, GNorm = 0.2186, lr_0 = 2.7913e-04
Loss = 1.3481e-02, PNorm = 178.9712, GNorm = 0.5203, lr_0 = 2.7894e-04
Loss = 1.2181e-02, PNorm = 178.9835, GNorm = 0.1376, lr_0 = 2.7875e-04
Loss = 8.7119e-03, PNorm = 178.9916, GNorm = 0.2950, lr_0 = 2.7855e-04
Loss = 7.2939e-03, PNorm = 179.0010, GNorm = 0.2327, lr_0 = 2.7836e-04
Loss = 1.2200e-02, PNorm = 179.0098, GNorm = 0.2940, lr_0 = 2.7817e-04
Loss = 1.7345e-02, PNorm = 179.0206, GNorm = 0.3513, lr_0 = 2.7798e-04
Loss = 7.7179e-03, PNorm = 179.0318, GNorm = 0.2496, lr_0 = 2.7779e-04
Loss = 1.0097e-02, PNorm = 179.0412, GNorm = 0.5064, lr_0 = 2.7760e-04
Loss = 7.8104e-03, PNorm = 179.0507, GNorm = 0.1622, lr_0 = 2.7741e-04
Loss = 7.0043e-03, PNorm = 179.0612, GNorm = 0.2226, lr_0 = 2.7722e-04
Loss = 8.8063e-03, PNorm = 179.0715, GNorm = 0.2262, lr_0 = 2.7703e-04
Loss = 8.3692e-03, PNorm = 179.0807, GNorm = 0.1049, lr_0 = 2.7684e-04
Loss = 9.0691e-03, PNorm = 179.0876, GNorm = 0.1663, lr_0 = 2.7665e-04
Loss = 6.7776e-03, PNorm = 179.0971, GNorm = 0.1204, lr_0 = 2.7646e-04
Loss = 6.9605e-03, PNorm = 179.1056, GNorm = 0.1006, lr_0 = 2.7627e-04
Loss = 7.9007e-03, PNorm = 179.1165, GNorm = 0.3939, lr_0 = 2.7608e-04
Loss = 8.4213e-03, PNorm = 179.1286, GNorm = 0.3630, lr_0 = 2.7590e-04
Loss = 7.9740e-03, PNorm = 179.1391, GNorm = 0.1173, lr_0 = 2.7571e-04
Loss = 9.3687e-03, PNorm = 179.1461, GNorm = 0.1441, lr_0 = 2.7552e-04
Loss = 6.7706e-03, PNorm = 179.1555, GNorm = 0.2359, lr_0 = 2.7533e-04
Loss = 6.1314e-03, PNorm = 179.1647, GNorm = 0.1536, lr_0 = 2.7514e-04
Loss = 1.1901e-02, PNorm = 179.1737, GNorm = 0.2103, lr_0 = 2.7495e-04
Loss = 9.1143e-03, PNorm = 179.1808, GNorm = 0.1719, lr_0 = 2.7476e-04
Loss = 1.2207e-02, PNorm = 179.1902, GNorm = 0.1661, lr_0 = 2.7457e-04
Loss = 1.0951e-02, PNorm = 179.2012, GNorm = 0.3564, lr_0 = 2.7439e-04
Loss = 8.2379e-03, PNorm = 179.2136, GNorm = 0.6977, lr_0 = 2.7420e-04
Loss = 7.7622e-03, PNorm = 179.2244, GNorm = 0.3791, lr_0 = 2.7401e-04
Loss = 9.7834e-03, PNorm = 179.2307, GNorm = 0.1531, lr_0 = 2.7382e-04
Loss = 1.1832e-02, PNorm = 179.2386, GNorm = 0.1349, lr_0 = 2.7364e-04
Loss = 2.2767e-02, PNorm = 179.2460, GNorm = 0.3202, lr_0 = 2.7345e-04
Loss = 1.1123e-02, PNorm = 179.2552, GNorm = 0.1331, lr_0 = 2.7326e-04
Loss = 8.2270e-03, PNorm = 179.2657, GNorm = 0.1129, lr_0 = 2.7307e-04
Loss = 8.5714e-03, PNorm = 179.2780, GNorm = 0.2137, lr_0 = 2.7289e-04
Loss = 9.0729e-03, PNorm = 179.2891, GNorm = 0.2992, lr_0 = 2.7270e-04
Loss = 1.1045e-02, PNorm = 179.2979, GNorm = 0.1885, lr_0 = 2.7251e-04
Loss = 1.0413e-02, PNorm = 179.3062, GNorm = 0.1417, lr_0 = 2.7233e-04
Loss = 9.5278e-03, PNorm = 179.3158, GNorm = 0.4389, lr_0 = 2.7214e-04
Loss = 9.5300e-03, PNorm = 179.3217, GNorm = 0.1960, lr_0 = 2.7195e-04
Loss = 8.5666e-03, PNorm = 179.3302, GNorm = 0.7098, lr_0 = 2.7177e-04
Loss = 8.3984e-03, PNorm = 179.3387, GNorm = 0.2519, lr_0 = 2.7158e-04
Loss = 1.1256e-02, PNorm = 179.3468, GNorm = 0.2682, lr_0 = 2.7139e-04
Loss = 1.1231e-02, PNorm = 179.3563, GNorm = 0.1478, lr_0 = 2.7121e-04
Loss = 8.0452e-03, PNorm = 179.3645, GNorm = 0.1834, lr_0 = 2.7102e-04
Loss = 1.1034e-02, PNorm = 179.3744, GNorm = 0.1296, lr_0 = 2.7084e-04
Loss = 7.1559e-03, PNorm = 179.3822, GNorm = 0.1738, lr_0 = 2.7065e-04
Loss = 1.2130e-02, PNorm = 179.3912, GNorm = 0.5919, lr_0 = 2.7047e-04
Loss = 6.8424e-03, PNorm = 179.3994, GNorm = 0.1673, lr_0 = 2.7028e-04
Loss = 7.0479e-03, PNorm = 179.4085, GNorm = 0.3560, lr_0 = 2.7010e-04
Loss = 6.6009e-03, PNorm = 179.4181, GNorm = 0.1373, lr_0 = 2.6991e-04
Loss = 8.8091e-03, PNorm = 179.4273, GNorm = 0.1540, lr_0 = 2.6973e-04
Loss = 7.3914e-03, PNorm = 179.4394, GNorm = 0.1472, lr_0 = 2.6954e-04
Loss = 1.1831e-02, PNorm = 179.4511, GNorm = 0.6688, lr_0 = 2.6936e-04
Loss = 1.0126e-02, PNorm = 179.4612, GNorm = 0.1242, lr_0 = 2.6917e-04
Loss = 7.2865e-03, PNorm = 179.4730, GNorm = 0.3584, lr_0 = 2.6899e-04
Loss = 7.8658e-03, PNorm = 179.4861, GNorm = 0.1497, lr_0 = 2.6880e-04
Loss = 7.9338e-03, PNorm = 179.4979, GNorm = 0.1210, lr_0 = 2.6862e-04
Loss = 1.1395e-02, PNorm = 179.5076, GNorm = 0.1819, lr_0 = 2.6844e-04
Loss = 8.7077e-03, PNorm = 179.5166, GNorm = 0.1643, lr_0 = 2.6825e-04
Validation mae = 0.120969
Epoch 18
Loss = 6.5723e-03, PNorm = 179.5243, GNorm = 0.3564, lr_0 = 2.6807e-04
Loss = 5.9222e-03, PNorm = 179.5290, GNorm = 0.1114, lr_0 = 2.6788e-04
Loss = 6.6032e-03, PNorm = 179.5341, GNorm = 0.1657, lr_0 = 2.6770e-04
Loss = 1.1305e-02, PNorm = 179.5380, GNorm = 0.1502, lr_0 = 2.6752e-04
Loss = 6.2360e-03, PNorm = 179.5441, GNorm = 0.2974, lr_0 = 2.6733e-04
Loss = 7.2204e-03, PNorm = 179.5513, GNorm = 0.1945, lr_0 = 2.6715e-04
Loss = 6.3680e-03, PNorm = 179.5564, GNorm = 0.2004, lr_0 = 2.6697e-04
Loss = 9.6257e-03, PNorm = 179.5621, GNorm = 0.2664, lr_0 = 2.6678e-04
Loss = 6.2338e-03, PNorm = 179.5686, GNorm = 0.3801, lr_0 = 2.6660e-04
Loss = 1.2309e-02, PNorm = 179.5760, GNorm = 0.1970, lr_0 = 2.6642e-04
Loss = 8.5768e-03, PNorm = 179.5812, GNorm = 0.1683, lr_0 = 2.6624e-04
Loss = 7.6410e-03, PNorm = 179.5886, GNorm = 0.2265, lr_0 = 2.6605e-04
Loss = 6.2486e-03, PNorm = 179.5949, GNorm = 0.3419, lr_0 = 2.6587e-04
Loss = 9.3637e-03, PNorm = 179.6002, GNorm = 0.3968, lr_0 = 2.6569e-04
Loss = 6.8728e-03, PNorm = 179.6085, GNorm = 0.1944, lr_0 = 2.6551e-04
Loss = 8.8566e-03, PNorm = 179.6151, GNorm = 0.1656, lr_0 = 2.6533e-04
Loss = 7.3004e-03, PNorm = 179.6233, GNorm = 0.0901, lr_0 = 2.6514e-04
Loss = 5.7679e-03, PNorm = 179.6322, GNorm = 0.2262, lr_0 = 2.6496e-04
Loss = 5.8620e-03, PNorm = 179.6386, GNorm = 0.2120, lr_0 = 2.6478e-04
Loss = 9.6111e-03, PNorm = 179.6457, GNorm = 0.1702, lr_0 = 2.6460e-04
Loss = 1.2593e-02, PNorm = 179.6515, GNorm = 0.3220, lr_0 = 2.6442e-04
Loss = 4.9749e-03, PNorm = 179.6582, GNorm = 0.1781, lr_0 = 2.6424e-04
Loss = 1.0097e-02, PNorm = 179.6674, GNorm = 0.0970, lr_0 = 2.6406e-04
Loss = 7.8191e-03, PNorm = 179.6776, GNorm = 0.3130, lr_0 = 2.6388e-04
Loss = 5.5137e-03, PNorm = 179.6852, GNorm = 0.1926, lr_0 = 2.6369e-04
Loss = 7.3967e-03, PNorm = 179.6920, GNorm = 0.2725, lr_0 = 2.6351e-04
Loss = 7.2663e-03, PNorm = 179.6977, GNorm = 0.3087, lr_0 = 2.6333e-04
Loss = 8.4950e-03, PNorm = 179.7032, GNorm = 0.1082, lr_0 = 2.6315e-04
Loss = 8.8324e-03, PNorm = 179.7091, GNorm = 0.1799, lr_0 = 2.6297e-04
Loss = 6.7898e-03, PNorm = 179.7166, GNorm = 0.2170, lr_0 = 2.6279e-04
Loss = 1.2132e-02, PNorm = 179.7237, GNorm = 0.2670, lr_0 = 2.6261e-04
Loss = 8.5514e-03, PNorm = 179.7308, GNorm = 0.1321, lr_0 = 2.6243e-04
Loss = 1.1752e-02, PNorm = 179.7382, GNorm = 0.1948, lr_0 = 2.6225e-04
Loss = 1.0451e-02, PNorm = 179.7433, GNorm = 0.1201, lr_0 = 2.6207e-04
Loss = 6.4282e-03, PNorm = 179.7504, GNorm = 0.2003, lr_0 = 2.6189e-04
Loss = 5.9290e-03, PNorm = 179.7591, GNorm = 0.2511, lr_0 = 2.6171e-04
Loss = 5.0485e-03, PNorm = 179.7656, GNorm = 0.1588, lr_0 = 2.6153e-04
Loss = 6.8286e-03, PNorm = 179.7709, GNorm = 0.1475, lr_0 = 2.6136e-04
Loss = 1.0848e-02, PNorm = 179.7767, GNorm = 0.2848, lr_0 = 2.6118e-04
Loss = 6.2303e-03, PNorm = 179.7862, GNorm = 0.1787, lr_0 = 2.6100e-04
Loss = 5.7561e-03, PNorm = 179.7949, GNorm = 0.3453, lr_0 = 2.6082e-04
Loss = 6.3180e-03, PNorm = 179.8025, GNorm = 0.1255, lr_0 = 2.6064e-04
Loss = 7.6502e-03, PNorm = 179.8096, GNorm = 0.1514, lr_0 = 2.6046e-04
Loss = 6.4103e-03, PNorm = 179.8192, GNorm = 0.2795, lr_0 = 2.6028e-04
Loss = 7.3065e-03, PNorm = 179.8277, GNorm = 0.2028, lr_0 = 2.6011e-04
Loss = 1.2341e-02, PNorm = 179.8369, GNorm = 0.2687, lr_0 = 2.5993e-04
Loss = 9.6480e-03, PNorm = 179.8445, GNorm = 0.3872, lr_0 = 2.5975e-04
Loss = 5.7410e-03, PNorm = 179.8532, GNorm = 0.1375, lr_0 = 2.5957e-04
Loss = 8.3516e-03, PNorm = 179.8606, GNorm = 0.2070, lr_0 = 2.5939e-04
Loss = 1.8983e-02, PNorm = 179.8683, GNorm = 0.3034, lr_0 = 2.5922e-04
Loss = 8.1809e-03, PNorm = 179.8750, GNorm = 0.2223, lr_0 = 2.5904e-04
Loss = 7.1736e-03, PNorm = 179.8860, GNorm = 0.2475, lr_0 = 2.5886e-04
Loss = 7.8854e-03, PNorm = 179.8925, GNorm = 0.2699, lr_0 = 2.5868e-04
Loss = 7.3694e-03, PNorm = 179.8995, GNorm = 0.2235, lr_0 = 2.5851e-04
Loss = 1.0824e-02, PNorm = 179.9076, GNorm = 0.2916, lr_0 = 2.5833e-04
Loss = 6.4130e-03, PNorm = 179.9159, GNorm = 0.1092, lr_0 = 2.5815e-04
Loss = 1.1066e-02, PNorm = 179.9228, GNorm = 0.2262, lr_0 = 2.5797e-04
Loss = 6.4252e-03, PNorm = 179.9333, GNorm = 0.1350, lr_0 = 2.5780e-04
Loss = 1.1782e-02, PNorm = 179.9414, GNorm = 0.3804, lr_0 = 2.5762e-04
Loss = 9.2999e-03, PNorm = 179.9485, GNorm = 0.2116, lr_0 = 2.5745e-04
Loss = 6.0921e-03, PNorm = 179.9590, GNorm = 0.2984, lr_0 = 2.5727e-04
Loss = 8.3919e-03, PNorm = 179.9698, GNorm = 0.2109, lr_0 = 2.5709e-04
Loss = 1.4054e-02, PNorm = 179.9756, GNorm = 0.1436, lr_0 = 2.5692e-04
Loss = 9.5969e-03, PNorm = 179.9790, GNorm = 0.1445, lr_0 = 2.5674e-04
Loss = 7.0397e-03, PNorm = 179.9819, GNorm = 0.1262, lr_0 = 2.5656e-04
Loss = 6.4626e-03, PNorm = 179.9870, GNorm = 0.3509, lr_0 = 2.5639e-04
Loss = 7.4430e-03, PNorm = 179.9946, GNorm = 0.2547, lr_0 = 2.5621e-04
Loss = 8.5500e-03, PNorm = 180.0027, GNorm = 0.0829, lr_0 = 2.5604e-04
Loss = 6.2071e-03, PNorm = 180.0104, GNorm = 0.1133, lr_0 = 2.5586e-04
Loss = 6.3081e-03, PNorm = 180.0195, GNorm = 0.0941, lr_0 = 2.5569e-04
Loss = 1.1173e-02, PNorm = 180.0287, GNorm = 0.2745, lr_0 = 2.5551e-04
Loss = 7.7078e-03, PNorm = 180.0365, GNorm = 0.1662, lr_0 = 2.5534e-04
Loss = 1.5497e-02, PNorm = 180.0422, GNorm = 0.1520, lr_0 = 2.5516e-04
Loss = 6.5681e-03, PNorm = 180.0489, GNorm = 0.1172, lr_0 = 2.5499e-04
Loss = 6.1362e-03, PNorm = 180.0564, GNorm = 0.1143, lr_0 = 2.5481e-04
Loss = 6.3948e-03, PNorm = 180.0637, GNorm = 0.1992, lr_0 = 2.5464e-04
Loss = 7.8890e-03, PNorm = 180.0707, GNorm = 0.1983, lr_0 = 2.5446e-04
Loss = 9.5208e-03, PNorm = 180.0790, GNorm = 0.3388, lr_0 = 2.5429e-04
Loss = 5.4818e-03, PNorm = 180.0894, GNorm = 0.1479, lr_0 = 2.5411e-04
Loss = 7.2013e-03, PNorm = 180.0997, GNorm = 0.1535, lr_0 = 2.5394e-04
Loss = 7.8554e-03, PNorm = 180.1095, GNorm = 0.2519, lr_0 = 2.5377e-04
Loss = 8.5817e-03, PNorm = 180.1197, GNorm = 0.1275, lr_0 = 2.5359e-04
Loss = 5.3120e-03, PNorm = 180.1258, GNorm = 0.1096, lr_0 = 2.5342e-04
Loss = 9.8114e-03, PNorm = 180.1313, GNorm = 0.1683, lr_0 = 2.5325e-04
Loss = 7.4652e-03, PNorm = 180.1367, GNorm = 0.1827, lr_0 = 2.5307e-04
Loss = 7.7873e-03, PNorm = 180.1433, GNorm = 0.2173, lr_0 = 2.5290e-04
Loss = 1.0191e-02, PNorm = 180.1482, GNorm = 0.1691, lr_0 = 2.5273e-04
Loss = 6.5415e-03, PNorm = 180.1556, GNorm = 0.1891, lr_0 = 2.5255e-04
Loss = 7.1548e-03, PNorm = 180.1643, GNorm = 0.1387, lr_0 = 2.5238e-04
Loss = 6.4754e-03, PNorm = 180.1711, GNorm = 0.1322, lr_0 = 2.5221e-04
Loss = 1.2723e-02, PNorm = 180.1770, GNorm = 0.1736, lr_0 = 2.5203e-04
Loss = 4.8966e-03, PNorm = 180.1842, GNorm = 0.1896, lr_0 = 2.5186e-04
Loss = 1.3992e-02, PNorm = 180.1923, GNorm = 0.3830, lr_0 = 2.5169e-04
Loss = 7.0459e-03, PNorm = 180.2017, GNorm = 0.4293, lr_0 = 2.5152e-04
Loss = 8.3233e-03, PNorm = 180.2111, GNorm = 0.1602, lr_0 = 2.5134e-04
Loss = 9.3796e-03, PNorm = 180.2210, GNorm = 0.1994, lr_0 = 2.5117e-04
Loss = 5.6936e-03, PNorm = 180.2300, GNorm = 0.4459, lr_0 = 2.5100e-04
Loss = 5.7438e-03, PNorm = 180.2375, GNorm = 0.1443, lr_0 = 2.5083e-04
Loss = 6.7543e-03, PNorm = 180.2457, GNorm = 0.1557, lr_0 = 2.5066e-04
Loss = 9.4717e-03, PNorm = 180.2536, GNorm = 0.2854, lr_0 = 2.5048e-04
Loss = 5.5354e-03, PNorm = 180.2607, GNorm = 0.1222, lr_0 = 2.5031e-04
Loss = 6.0743e-03, PNorm = 180.2697, GNorm = 0.1335, lr_0 = 2.5014e-04
Loss = 1.0645e-02, PNorm = 180.2791, GNorm = 0.2229, lr_0 = 2.4997e-04
Loss = 5.3042e-03, PNorm = 180.2904, GNorm = 0.1648, lr_0 = 2.4980e-04
Loss = 6.3076e-03, PNorm = 180.2986, GNorm = 0.1360, lr_0 = 2.4963e-04
Loss = 1.0466e-02, PNorm = 180.3082, GNorm = 0.2104, lr_0 = 2.4946e-04
Loss = 7.3151e-03, PNorm = 180.3146, GNorm = 0.1112, lr_0 = 2.4929e-04
Loss = 1.1343e-02, PNorm = 180.3219, GNorm = 0.3528, lr_0 = 2.4911e-04
Loss = 1.2820e-02, PNorm = 180.3297, GNorm = 0.2925, lr_0 = 2.4894e-04
Loss = 5.9878e-03, PNorm = 180.3390, GNorm = 0.2520, lr_0 = 2.4877e-04
Loss = 1.2925e-02, PNorm = 180.3482, GNorm = 0.2760, lr_0 = 2.4860e-04
Loss = 7.2149e-03, PNorm = 180.3556, GNorm = 0.3619, lr_0 = 2.4843e-04
Loss = 4.8664e-03, PNorm = 180.3641, GNorm = 0.2374, lr_0 = 2.4826e-04
Loss = 7.8817e-03, PNorm = 180.3729, GNorm = 0.2358, lr_0 = 2.4809e-04
Loss = 6.0856e-03, PNorm = 180.3811, GNorm = 0.1282, lr_0 = 2.4792e-04
Loss = 6.1836e-03, PNorm = 180.3882, GNorm = 0.1534, lr_0 = 2.4775e-04
Loss = 9.5317e-03, PNorm = 180.3944, GNorm = 0.1036, lr_0 = 2.4758e-04
Loss = 5.5398e-03, PNorm = 180.4014, GNorm = 0.1753, lr_0 = 2.4741e-04
Loss = 9.7683e-03, PNorm = 180.4123, GNorm = 0.1966, lr_0 = 2.4724e-04
Loss = 5.2754e-03, PNorm = 180.4212, GNorm = 0.2250, lr_0 = 2.4707e-04
Validation mae = 0.121167
Epoch 19
Loss = 5.7604e-03, PNorm = 180.4281, GNorm = 0.2794, lr_0 = 2.4690e-04
Loss = 5.2971e-03, PNorm = 180.4333, GNorm = 0.2836, lr_0 = 2.4674e-04
Loss = 8.3321e-03, PNorm = 180.4376, GNorm = 0.3482, lr_0 = 2.4657e-04
Loss = 1.9353e-02, PNorm = 180.4472, GNorm = 0.1821, lr_0 = 2.4640e-04
Loss = 7.6296e-03, PNorm = 180.4513, GNorm = 0.6132, lr_0 = 2.4623e-04
Loss = 7.5549e-03, PNorm = 180.4561, GNorm = 0.1616, lr_0 = 2.4606e-04
Loss = 7.5244e-03, PNorm = 180.4613, GNorm = 0.1522, lr_0 = 2.4589e-04
Loss = 6.8964e-03, PNorm = 180.4644, GNorm = 0.1690, lr_0 = 2.4572e-04
Loss = 6.6484e-03, PNorm = 180.4687, GNorm = 0.1880, lr_0 = 2.4556e-04
Loss = 6.2555e-03, PNorm = 180.4722, GNorm = 0.1075, lr_0 = 2.4539e-04
Loss = 4.8789e-03, PNorm = 180.4779, GNorm = 0.1149, lr_0 = 2.4522e-04
Loss = 6.0644e-03, PNorm = 180.4834, GNorm = 0.2857, lr_0 = 2.4505e-04
Loss = 5.0781e-03, PNorm = 180.4916, GNorm = 0.1110, lr_0 = 2.4488e-04
Loss = 1.1839e-02, PNorm = 180.4987, GNorm = 0.5707, lr_0 = 2.4472e-04
Loss = 5.6120e-03, PNorm = 180.5045, GNorm = 0.2110, lr_0 = 2.4455e-04
Loss = 8.9170e-03, PNorm = 180.5152, GNorm = 0.2005, lr_0 = 2.4438e-04
Loss = 1.4250e-02, PNorm = 180.5176, GNorm = 0.3307, lr_0 = 2.4421e-04
Loss = 6.3816e-03, PNorm = 180.5264, GNorm = 0.0988, lr_0 = 2.4405e-04
Loss = 6.2482e-03, PNorm = 180.5352, GNorm = 0.5339, lr_0 = 2.4388e-04
Loss = 6.3850e-03, PNorm = 180.5409, GNorm = 0.1342, lr_0 = 2.4371e-04
Loss = 6.1495e-03, PNorm = 180.5466, GNorm = 0.1886, lr_0 = 2.4354e-04
Loss = 1.1137e-02, PNorm = 180.5523, GNorm = 0.1923, lr_0 = 2.4338e-04
Loss = 8.6585e-03, PNorm = 180.5601, GNorm = 0.2512, lr_0 = 2.4321e-04
Loss = 6.2320e-03, PNorm = 180.5691, GNorm = 0.1352, lr_0 = 2.4304e-04
Loss = 5.3667e-03, PNorm = 180.5734, GNorm = 0.1796, lr_0 = 2.4288e-04
Loss = 5.4611e-03, PNorm = 180.5751, GNorm = 0.5392, lr_0 = 2.4271e-04
Loss = 6.6854e-03, PNorm = 180.5807, GNorm = 0.1144, lr_0 = 2.4254e-04
Loss = 5.1919e-03, PNorm = 180.5859, GNorm = 0.3443, lr_0 = 2.4238e-04
Loss = 5.6124e-03, PNorm = 180.5927, GNorm = 0.2523, lr_0 = 2.4221e-04
Loss = 1.0961e-02, PNorm = 180.5968, GNorm = 0.2038, lr_0 = 2.4205e-04
Loss = 8.8721e-03, PNorm = 180.6021, GNorm = 0.4390, lr_0 = 2.4188e-04
Loss = 6.3292e-03, PNorm = 180.6092, GNorm = 0.3474, lr_0 = 2.4171e-04
Loss = 7.2976e-03, PNorm = 180.6160, GNorm = 0.1130, lr_0 = 2.4155e-04
Loss = 5.5565e-03, PNorm = 180.6236, GNorm = 0.1935, lr_0 = 2.4138e-04
Loss = 5.0851e-03, PNorm = 180.6323, GNorm = 0.1544, lr_0 = 2.4122e-04
Loss = 6.5112e-03, PNorm = 180.6387, GNorm = 0.1081, lr_0 = 2.4105e-04
Loss = 8.7116e-03, PNorm = 180.6441, GNorm = 0.2877, lr_0 = 2.4089e-04
Loss = 5.8028e-03, PNorm = 180.6509, GNorm = 0.1439, lr_0 = 2.4072e-04
Loss = 8.1892e-03, PNorm = 180.6549, GNorm = 0.1642, lr_0 = 2.4056e-04
Loss = 4.1411e-03, PNorm = 180.6597, GNorm = 0.2240, lr_0 = 2.4039e-04
Loss = 8.6451e-03, PNorm = 180.6656, GNorm = 0.1360, lr_0 = 2.4023e-04
Loss = 5.9716e-03, PNorm = 180.6710, GNorm = 0.1080, lr_0 = 2.4006e-04
Loss = 8.7304e-03, PNorm = 180.6765, GNorm = 0.1680, lr_0 = 2.3990e-04
Loss = 6.6159e-03, PNorm = 180.6833, GNorm = 0.1487, lr_0 = 2.3974e-04
Loss = 6.2651e-03, PNorm = 180.6886, GNorm = 0.2284, lr_0 = 2.3957e-04
Loss = 9.5127e-03, PNorm = 180.6944, GNorm = 0.3056, lr_0 = 2.3941e-04
Loss = 7.8378e-03, PNorm = 180.7003, GNorm = 0.1348, lr_0 = 2.3924e-04
Loss = 7.5716e-03, PNorm = 180.7097, GNorm = 0.1044, lr_0 = 2.3908e-04
Loss = 4.7028e-03, PNorm = 180.7169, GNorm = 0.1490, lr_0 = 2.3892e-04
Loss = 6.0923e-03, PNorm = 180.7212, GNorm = 0.1881, lr_0 = 2.3875e-04
Loss = 1.0437e-02, PNorm = 180.7275, GNorm = 0.1562, lr_0 = 2.3859e-04
Loss = 5.8053e-03, PNorm = 180.7336, GNorm = 0.1874, lr_0 = 2.3842e-04
Loss = 5.0525e-03, PNorm = 180.7405, GNorm = 0.2614, lr_0 = 2.3826e-04
Loss = 6.2959e-03, PNorm = 180.7454, GNorm = 0.1515, lr_0 = 2.3810e-04
Loss = 6.0833e-03, PNorm = 180.7502, GNorm = 0.1422, lr_0 = 2.3794e-04
Loss = 5.1501e-03, PNorm = 180.7571, GNorm = 0.2024, lr_0 = 2.3777e-04
Loss = 6.7923e-03, PNorm = 180.7614, GNorm = 0.1659, lr_0 = 2.3761e-04
Loss = 1.1049e-02, PNorm = 180.7665, GNorm = 0.2821, lr_0 = 2.3745e-04
Loss = 7.7981e-03, PNorm = 180.7683, GNorm = 0.3463, lr_0 = 2.3728e-04
Loss = 1.2472e-02, PNorm = 180.7703, GNorm = 0.2590, lr_0 = 2.3712e-04
Loss = 7.5110e-03, PNorm = 180.7746, GNorm = 0.3811, lr_0 = 2.3696e-04
Loss = 5.4869e-03, PNorm = 180.7839, GNorm = 0.1011, lr_0 = 2.3680e-04
Loss = 6.1933e-03, PNorm = 180.7915, GNorm = 0.1583, lr_0 = 2.3663e-04
Loss = 4.8720e-03, PNorm = 180.7979, GNorm = 0.3693, lr_0 = 2.3647e-04
Loss = 9.6241e-03, PNorm = 180.8035, GNorm = 0.2851, lr_0 = 2.3631e-04
Loss = 7.3567e-03, PNorm = 180.8110, GNorm = 0.4173, lr_0 = 2.3615e-04
Loss = 5.8621e-03, PNorm = 180.8191, GNorm = 0.0957, lr_0 = 2.3599e-04
Loss = 6.9978e-03, PNorm = 180.8265, GNorm = 0.1297, lr_0 = 2.3582e-04
Loss = 5.5583e-03, PNorm = 180.8320, GNorm = 0.1957, lr_0 = 2.3566e-04
Loss = 4.8271e-03, PNorm = 180.8369, GNorm = 0.2491, lr_0 = 2.3550e-04
Loss = 4.2486e-03, PNorm = 180.8426, GNorm = 0.4018, lr_0 = 2.3534e-04
Loss = 6.8908e-03, PNorm = 180.8485, GNorm = 0.1764, lr_0 = 2.3518e-04
Loss = 4.9837e-03, PNorm = 180.8564, GNorm = 0.1179, lr_0 = 2.3502e-04
Loss = 7.3028e-03, PNorm = 180.8651, GNorm = 0.0943, lr_0 = 2.3486e-04
Loss = 5.6389e-03, PNorm = 180.8740, GNorm = 0.1496, lr_0 = 2.3470e-04
Loss = 7.0611e-03, PNorm = 180.8830, GNorm = 0.1162, lr_0 = 2.3454e-04
Loss = 6.3555e-03, PNorm = 180.8895, GNorm = 0.2398, lr_0 = 2.3437e-04
Loss = 9.7098e-03, PNorm = 180.8969, GNorm = 0.1372, lr_0 = 2.3421e-04
Loss = 5.5982e-03, PNorm = 180.9047, GNorm = 0.1728, lr_0 = 2.3405e-04
Loss = 6.5238e-03, PNorm = 180.9102, GNorm = 0.2750, lr_0 = 2.3389e-04
Loss = 8.0652e-03, PNorm = 180.9146, GNorm = 0.2034, lr_0 = 2.3373e-04
Loss = 1.2880e-02, PNorm = 180.9187, GNorm = 0.8708, lr_0 = 2.3357e-04
Loss = 6.1053e-03, PNorm = 180.9209, GNorm = 0.2318, lr_0 = 2.3341e-04
Loss = 6.0113e-03, PNorm = 180.9281, GNorm = 0.1557, lr_0 = 2.3325e-04
Loss = 5.4538e-03, PNorm = 180.9362, GNorm = 0.2108, lr_0 = 2.3309e-04
Loss = 6.5641e-03, PNorm = 180.9448, GNorm = 0.5495, lr_0 = 2.3293e-04
Loss = 8.6715e-03, PNorm = 180.9526, GNorm = 0.1882, lr_0 = 2.3277e-04
Loss = 6.5015e-03, PNorm = 180.9597, GNorm = 0.1012, lr_0 = 2.3261e-04
Loss = 6.7312e-03, PNorm = 180.9653, GNorm = 0.6568, lr_0 = 2.3246e-04
Loss = 7.8239e-03, PNorm = 180.9706, GNorm = 0.1877, lr_0 = 2.3230e-04
Loss = 8.6919e-03, PNorm = 180.9779, GNorm = 0.2005, lr_0 = 2.3214e-04
Loss = 6.3798e-03, PNorm = 180.9849, GNorm = 0.1991, lr_0 = 2.3198e-04
Loss = 1.1926e-02, PNorm = 180.9897, GNorm = 1.0301, lr_0 = 2.3182e-04
Loss = 5.3437e-03, PNorm = 180.9958, GNorm = 0.1727, lr_0 = 2.3166e-04
Loss = 6.1085e-03, PNorm = 181.0045, GNorm = 0.1654, lr_0 = 2.3150e-04
Loss = 4.3863e-03, PNorm = 181.0139, GNorm = 0.1027, lr_0 = 2.3134e-04
Loss = 9.6535e-03, PNorm = 181.0233, GNorm = 0.3299, lr_0 = 2.3118e-04
Loss = 8.9342e-03, PNorm = 181.0310, GNorm = 0.2831, lr_0 = 2.3103e-04
Loss = 7.2664e-03, PNorm = 181.0367, GNorm = 0.2910, lr_0 = 2.3087e-04
Loss = 1.0940e-02, PNorm = 181.0404, GNorm = 0.4375, lr_0 = 2.3071e-04
Loss = 5.5589e-03, PNorm = 181.0454, GNorm = 0.2398, lr_0 = 2.3055e-04
Loss = 7.2552e-03, PNorm = 181.0496, GNorm = 0.3267, lr_0 = 2.3039e-04
Loss = 9.4372e-03, PNorm = 181.0559, GNorm = 0.2080, lr_0 = 2.3024e-04
Loss = 4.5688e-03, PNorm = 181.0640, GNorm = 0.0945, lr_0 = 2.3008e-04
Loss = 8.6902e-03, PNorm = 181.0694, GNorm = 0.4808, lr_0 = 2.2992e-04
Loss = 5.3916e-03, PNorm = 181.0768, GNorm = 0.1855, lr_0 = 2.2976e-04
Loss = 6.5728e-03, PNorm = 181.0838, GNorm = 0.1121, lr_0 = 2.2961e-04
Loss = 6.7063e-03, PNorm = 181.0922, GNorm = 0.2912, lr_0 = 2.2945e-04
Loss = 1.1545e-02, PNorm = 181.0980, GNorm = 0.2135, lr_0 = 2.2929e-04
Loss = 3.5776e-03, PNorm = 181.1021, GNorm = 0.1168, lr_0 = 2.2913e-04
Loss = 4.8772e-03, PNorm = 181.1085, GNorm = 0.1419, lr_0 = 2.2898e-04
Loss = 5.5613e-03, PNorm = 181.1149, GNorm = 0.0798, lr_0 = 2.2882e-04
Loss = 7.2443e-03, PNorm = 181.1204, GNorm = 0.0877, lr_0 = 2.2866e-04
Loss = 6.7516e-03, PNorm = 181.1279, GNorm = 0.4852, lr_0 = 2.2851e-04
Loss = 4.7836e-03, PNorm = 181.1377, GNorm = 0.0936, lr_0 = 2.2835e-04
Loss = 1.0802e-02, PNorm = 181.1432, GNorm = 0.1949, lr_0 = 2.2819e-04
Loss = 4.7554e-03, PNorm = 181.1496, GNorm = 0.1350, lr_0 = 2.2804e-04
Loss = 5.2400e-03, PNorm = 181.1566, GNorm = 0.3637, lr_0 = 2.2788e-04
Loss = 8.1818e-03, PNorm = 181.1627, GNorm = 0.1505, lr_0 = 2.2773e-04
Loss = 4.2521e-03, PNorm = 181.1703, GNorm = 0.1084, lr_0 = 2.2757e-04
Validation mae = 0.120980
Epoch 20
Loss = 6.6593e-03, PNorm = 181.1765, GNorm = 0.1796, lr_0 = 2.2741e-04
Loss = 6.9306e-03, PNorm = 181.1826, GNorm = 0.1447, lr_0 = 2.2726e-04
Loss = 5.0596e-03, PNorm = 181.1895, GNorm = 0.2288, lr_0 = 2.2710e-04
Loss = 5.0957e-03, PNorm = 181.1955, GNorm = 0.1330, lr_0 = 2.2695e-04
Loss = 4.0660e-03, PNorm = 181.2013, GNorm = 0.0760, lr_0 = 2.2679e-04
Loss = 4.2079e-03, PNorm = 181.2048, GNorm = 0.2106, lr_0 = 2.2664e-04
Loss = 3.9463e-03, PNorm = 181.2078, GNorm = 0.0691, lr_0 = 2.2648e-04
Loss = 6.5461e-03, PNorm = 181.2109, GNorm = 0.1191, lr_0 = 2.2632e-04
Loss = 4.4135e-03, PNorm = 181.2157, GNorm = 0.2273, lr_0 = 2.2617e-04
Loss = 4.2903e-03, PNorm = 181.2206, GNorm = 0.0871, lr_0 = 2.2601e-04
Loss = 4.8428e-03, PNorm = 181.2283, GNorm = 0.2279, lr_0 = 2.2586e-04
Loss = 4.2948e-03, PNorm = 181.2349, GNorm = 0.2221, lr_0 = 2.2571e-04
Loss = 6.1197e-03, PNorm = 181.2405, GNorm = 0.1361, lr_0 = 2.2555e-04
Loss = 4.1697e-03, PNorm = 181.2417, GNorm = 0.1134, lr_0 = 2.2540e-04
Loss = 4.7434e-03, PNorm = 181.2454, GNorm = 0.3300, lr_0 = 2.2524e-04
Loss = 4.3522e-03, PNorm = 181.2469, GNorm = 0.1127, lr_0 = 2.2509e-04
Loss = 5.4836e-03, PNorm = 181.2506, GNorm = 0.1159, lr_0 = 2.2493e-04
Loss = 8.1463e-03, PNorm = 181.2534, GNorm = 0.1490, lr_0 = 2.2478e-04
Loss = 5.3954e-03, PNorm = 181.2584, GNorm = 0.1523, lr_0 = 2.2463e-04
Loss = 6.0248e-03, PNorm = 181.2637, GNorm = 0.0996, lr_0 = 2.2447e-04
Loss = 7.2657e-03, PNorm = 181.2670, GNorm = 0.2052, lr_0 = 2.2432e-04
Loss = 4.1887e-03, PNorm = 181.2734, GNorm = 0.1058, lr_0 = 2.2416e-04
Loss = 7.4453e-03, PNorm = 181.2783, GNorm = 0.0627, lr_0 = 2.2401e-04
Loss = 4.9921e-03, PNorm = 181.2837, GNorm = 0.1753, lr_0 = 2.2386e-04
Loss = 5.8648e-03, PNorm = 181.2898, GNorm = 0.2282, lr_0 = 2.2370e-04
Loss = 5.4745e-03, PNorm = 181.2948, GNorm = 0.2237, lr_0 = 2.2355e-04
Loss = 4.1330e-03, PNorm = 181.3004, GNorm = 0.0990, lr_0 = 2.2340e-04
Loss = 4.3746e-03, PNorm = 181.3046, GNorm = 0.2255, lr_0 = 2.2324e-04
Loss = 1.8139e-02, PNorm = 181.3123, GNorm = 0.3531, lr_0 = 2.2309e-04
Loss = 7.3702e-03, PNorm = 181.3193, GNorm = 0.1379, lr_0 = 2.2294e-04
Loss = 6.5947e-03, PNorm = 181.3243, GNorm = 0.4058, lr_0 = 2.2279e-04
Loss = 5.1044e-03, PNorm = 181.3285, GNorm = 0.0950, lr_0 = 2.2263e-04
Loss = 5.9918e-03, PNorm = 181.3332, GNorm = 0.1449, lr_0 = 2.2248e-04
Loss = 4.8434e-03, PNorm = 181.3388, GNorm = 0.2049, lr_0 = 2.2233e-04
Loss = 3.6578e-03, PNorm = 181.3456, GNorm = 0.1110, lr_0 = 2.2218e-04
Loss = 1.4177e-02, PNorm = 181.3460, GNorm = 0.2376, lr_0 = 2.2202e-04
Loss = 6.5750e-03, PNorm = 181.3513, GNorm = 0.1388, lr_0 = 2.2187e-04
Loss = 5.4841e-03, PNorm = 181.3559, GNorm = 0.3323, lr_0 = 2.2172e-04
Loss = 4.8469e-03, PNorm = 181.3597, GNorm = 0.1769, lr_0 = 2.2157e-04
Loss = 4.5923e-03, PNorm = 181.3660, GNorm = 0.3310, lr_0 = 2.2142e-04
Loss = 3.8106e-03, PNorm = 181.3731, GNorm = 0.1204, lr_0 = 2.2126e-04
Loss = 7.8208e-03, PNorm = 181.3757, GNorm = 0.3311, lr_0 = 2.2111e-04
Loss = 4.5096e-03, PNorm = 181.3804, GNorm = 0.1691, lr_0 = 2.2096e-04
Loss = 7.8079e-03, PNorm = 181.3858, GNorm = 0.2958, lr_0 = 2.2081e-04
Loss = 4.4893e-03, PNorm = 181.3893, GNorm = 0.1751, lr_0 = 2.2066e-04
Loss = 5.9065e-03, PNorm = 181.3937, GNorm = 0.2473, lr_0 = 2.2051e-04
Loss = 7.9513e-03, PNorm = 181.4001, GNorm = 0.1902, lr_0 = 2.2036e-04
Loss = 5.5500e-03, PNorm = 181.4069, GNorm = 0.1571, lr_0 = 2.2021e-04
Loss = 6.7740e-03, PNorm = 181.4119, GNorm = 0.1744, lr_0 = 2.2005e-04
Loss = 7.0384e-03, PNorm = 181.4163, GNorm = 0.1721, lr_0 = 2.1990e-04
Loss = 8.4724e-03, PNorm = 181.4236, GNorm = 0.1609, lr_0 = 2.1975e-04
Loss = 8.1260e-03, PNorm = 181.4299, GNorm = 0.1518, lr_0 = 2.1960e-04
Loss = 5.8150e-03, PNorm = 181.4341, GNorm = 0.2756, lr_0 = 2.1945e-04
Loss = 1.0621e-02, PNorm = 181.4393, GNorm = 0.1330, lr_0 = 2.1930e-04
Loss = 4.7672e-03, PNorm = 181.4437, GNorm = 0.1270, lr_0 = 2.1915e-04
Loss = 4.9287e-03, PNorm = 181.4482, GNorm = 0.1170, lr_0 = 2.1900e-04
Loss = 4.8900e-03, PNorm = 181.4537, GNorm = 0.1223, lr_0 = 2.1885e-04
Loss = 3.8385e-03, PNorm = 181.4602, GNorm = 0.2371, lr_0 = 2.1870e-04
Loss = 5.4811e-03, PNorm = 181.4665, GNorm = 0.2217, lr_0 = 2.1855e-04
Loss = 5.5769e-03, PNorm = 181.4714, GNorm = 0.1561, lr_0 = 2.1840e-04
Loss = 4.7288e-03, PNorm = 181.4769, GNorm = 0.1088, lr_0 = 2.1825e-04
Loss = 4.2123e-03, PNorm = 181.4827, GNorm = 0.0975, lr_0 = 2.1810e-04
Loss = 1.0591e-02, PNorm = 181.4875, GNorm = 0.7475, lr_0 = 2.1795e-04
Loss = 8.0098e-03, PNorm = 181.4929, GNorm = 0.0616, lr_0 = 2.1780e-04
Loss = 5.0737e-03, PNorm = 181.4966, GNorm = 0.2319, lr_0 = 2.1765e-04
Loss = 7.6250e-03, PNorm = 181.5017, GNorm = 0.1123, lr_0 = 2.1751e-04
Loss = 5.2955e-03, PNorm = 181.5059, GNorm = 0.1871, lr_0 = 2.1736e-04
Loss = 9.1913e-03, PNorm = 181.5112, GNorm = 0.3634, lr_0 = 2.1721e-04
Loss = 6.8874e-03, PNorm = 181.5173, GNorm = 0.2874, lr_0 = 2.1706e-04
Loss = 5.6436e-03, PNorm = 181.5249, GNorm = 0.1612, lr_0 = 2.1691e-04
Loss = 7.4018e-03, PNorm = 181.5305, GNorm = 0.1421, lr_0 = 2.1676e-04
Loss = 4.4651e-03, PNorm = 181.5356, GNorm = 0.0776, lr_0 = 2.1661e-04
Loss = 1.1649e-02, PNorm = 181.5400, GNorm = 0.1310, lr_0 = 2.1646e-04
Loss = 4.1315e-03, PNorm = 181.5450, GNorm = 0.1278, lr_0 = 2.1632e-04
Loss = 8.5722e-03, PNorm = 181.5496, GNorm = 0.1759, lr_0 = 2.1617e-04
Loss = 4.5902e-03, PNorm = 181.5539, GNorm = 0.1213, lr_0 = 2.1602e-04
Loss = 5.4081e-03, PNorm = 181.5598, GNorm = 0.1991, lr_0 = 2.1587e-04
Loss = 4.5069e-03, PNorm = 181.5637, GNorm = 0.1703, lr_0 = 2.1572e-04
Loss = 7.9919e-03, PNorm = 181.5689, GNorm = 0.2118, lr_0 = 2.1558e-04
Loss = 3.8534e-03, PNorm = 181.5754, GNorm = 0.0826, lr_0 = 2.1543e-04
Loss = 5.7640e-03, PNorm = 181.5824, GNorm = 0.0775, lr_0 = 2.1528e-04
Loss = 5.4798e-03, PNorm = 181.5889, GNorm = 0.1041, lr_0 = 2.1513e-04
Loss = 6.6151e-03, PNorm = 181.5921, GNorm = 0.3485, lr_0 = 2.1499e-04
Loss = 6.8362e-03, PNorm = 181.5982, GNorm = 0.2159, lr_0 = 2.1484e-04
Loss = 6.9907e-03, PNorm = 181.6033, GNorm = 0.0767, lr_0 = 2.1469e-04
Loss = 9.6705e-03, PNorm = 181.6086, GNorm = 0.3124, lr_0 = 2.1454e-04
Loss = 1.1280e-02, PNorm = 181.6108, GNorm = 0.3898, lr_0 = 2.1440e-04
Loss = 8.2598e-03, PNorm = 181.6180, GNorm = 0.1874, lr_0 = 2.1425e-04
Loss = 4.8444e-03, PNorm = 181.6240, GNorm = 0.2611, lr_0 = 2.1410e-04
Loss = 6.1571e-03, PNorm = 181.6298, GNorm = 0.1826, lr_0 = 2.1396e-04
Loss = 1.1190e-02, PNorm = 181.6367, GNorm = 0.1903, lr_0 = 2.1381e-04
Loss = 4.6909e-03, PNorm = 181.6402, GNorm = 0.2376, lr_0 = 2.1366e-04
Loss = 5.5955e-03, PNorm = 181.6484, GNorm = 0.4157, lr_0 = 2.1352e-04
Loss = 5.8412e-03, PNorm = 181.6575, GNorm = 0.5748, lr_0 = 2.1337e-04
Loss = 4.3958e-03, PNorm = 181.6684, GNorm = 0.1646, lr_0 = 2.1323e-04
Loss = 5.4039e-03, PNorm = 181.6746, GNorm = 0.1782, lr_0 = 2.1308e-04
Loss = 6.2678e-03, PNorm = 181.6790, GNorm = 0.0970, lr_0 = 2.1293e-04
Loss = 4.0435e-03, PNorm = 181.6830, GNorm = 0.1544, lr_0 = 2.1279e-04
Loss = 6.7155e-03, PNorm = 181.6871, GNorm = 0.2202, lr_0 = 2.1264e-04
Loss = 5.0455e-03, PNorm = 181.6923, GNorm = 0.2010, lr_0 = 2.1250e-04
Loss = 8.5352e-03, PNorm = 181.7003, GNorm = 0.1346, lr_0 = 2.1235e-04
Loss = 1.0330e-02, PNorm = 181.7071, GNorm = 0.3976, lr_0 = 2.1221e-04
Loss = 1.0062e-02, PNorm = 181.7143, GNorm = 0.1751, lr_0 = 2.1206e-04
Loss = 5.3849e-03, PNorm = 181.7204, GNorm = 0.0879, lr_0 = 2.1191e-04
Loss = 9.4176e-03, PNorm = 181.7263, GNorm = 0.5860, lr_0 = 2.1177e-04
Loss = 1.1895e-02, PNorm = 181.7345, GNorm = 0.1703, lr_0 = 2.1162e-04
Loss = 5.8448e-03, PNorm = 181.7407, GNorm = 0.1422, lr_0 = 2.1148e-04
Loss = 4.9324e-03, PNorm = 181.7466, GNorm = 0.2027, lr_0 = 2.1133e-04
Loss = 5.8684e-03, PNorm = 181.7525, GNorm = 0.1003, lr_0 = 2.1119e-04
Loss = 1.0931e-02, PNorm = 181.7604, GNorm = 0.1388, lr_0 = 2.1104e-04
Loss = 8.8124e-03, PNorm = 181.7674, GNorm = 0.2369, lr_0 = 2.1090e-04
Loss = 4.7611e-03, PNorm = 181.7729, GNorm = 0.2452, lr_0 = 2.1076e-04
Loss = 8.7590e-03, PNorm = 181.7772, GNorm = 0.4178, lr_0 = 2.1061e-04
Loss = 5.1349e-03, PNorm = 181.7822, GNorm = 0.0938, lr_0 = 2.1047e-04
Loss = 5.6314e-03, PNorm = 181.7881, GNorm = 0.1488, lr_0 = 2.1032e-04
Loss = 8.3956e-03, PNorm = 181.7946, GNorm = 0.2807, lr_0 = 2.1018e-04
Loss = 4.9512e-03, PNorm = 181.8012, GNorm = 0.1374, lr_0 = 2.1003e-04
Loss = 4.2258e-03, PNorm = 181.8087, GNorm = 0.1485, lr_0 = 2.0989e-04
Loss = 1.0212e-02, PNorm = 181.8145, GNorm = 0.1655, lr_0 = 2.0975e-04
Loss = 1.1790e-02, PNorm = 181.8201, GNorm = 0.1522, lr_0 = 2.0960e-04
Validation mae = 0.120990
Epoch 21
Loss = 8.1340e-03, PNorm = 181.8242, GNorm = 0.2240, lr_0 = 2.0946e-04
Loss = 5.4059e-03, PNorm = 181.8295, GNorm = 0.0979, lr_0 = 2.0932e-04
Loss = 5.0164e-03, PNorm = 181.8340, GNorm = 0.1823, lr_0 = 2.0917e-04
Loss = 5.8251e-03, PNorm = 181.8373, GNorm = 0.0673, lr_0 = 2.0903e-04
Loss = 1.4984e-02, PNorm = 181.8433, GNorm = 0.2146, lr_0 = 2.0889e-04
Loss = 5.0796e-03, PNorm = 181.8445, GNorm = 0.1366, lr_0 = 2.0874e-04
Loss = 3.7494e-03, PNorm = 181.8488, GNorm = 0.2424, lr_0 = 2.0860e-04
Loss = 6.1637e-03, PNorm = 181.8510, GNorm = 0.3830, lr_0 = 2.0846e-04
Loss = 3.9838e-03, PNorm = 181.8543, GNorm = 0.1857, lr_0 = 2.0831e-04
Loss = 6.2640e-03, PNorm = 181.8603, GNorm = 0.1055, lr_0 = 2.0817e-04
Loss = 5.5229e-03, PNorm = 181.8646, GNorm = 0.2112, lr_0 = 2.0803e-04
Loss = 3.9956e-03, PNorm = 181.8704, GNorm = 0.0931, lr_0 = 2.0789e-04
Loss = 4.0799e-03, PNorm = 181.8752, GNorm = 0.1557, lr_0 = 2.0774e-04
Loss = 4.7760e-03, PNorm = 181.8782, GNorm = 0.1079, lr_0 = 2.0760e-04
Loss = 5.2651e-03, PNorm = 181.8806, GNorm = 0.0822, lr_0 = 2.0746e-04
Loss = 6.0813e-03, PNorm = 181.8851, GNorm = 0.1429, lr_0 = 2.0732e-04
Loss = 4.7921e-03, PNorm = 181.8901, GNorm = 0.1212, lr_0 = 2.0718e-04
Loss = 4.3792e-03, PNorm = 181.8938, GNorm = 0.1819, lr_0 = 2.0703e-04
Loss = 7.7997e-03, PNorm = 181.8998, GNorm = 0.1797, lr_0 = 2.0689e-04
Loss = 6.7852e-03, PNorm = 181.9035, GNorm = 0.1376, lr_0 = 2.0675e-04
Loss = 7.6860e-03, PNorm = 181.9067, GNorm = 0.8595, lr_0 = 2.0661e-04
Loss = 7.0818e-03, PNorm = 181.9122, GNorm = 0.2146, lr_0 = 2.0647e-04
Loss = 3.1375e-03, PNorm = 181.9169, GNorm = 0.1014, lr_0 = 2.0633e-04
Loss = 5.4651e-03, PNorm = 181.9194, GNorm = 0.1357, lr_0 = 2.0618e-04
Loss = 3.7931e-03, PNorm = 181.9254, GNorm = 0.2542, lr_0 = 2.0604e-04
Loss = 3.0887e-03, PNorm = 181.9303, GNorm = 0.1450, lr_0 = 2.0590e-04
Loss = 4.1305e-03, PNorm = 181.9353, GNorm = 0.1320, lr_0 = 2.0576e-04
Loss = 4.1628e-03, PNorm = 181.9399, GNorm = 0.1774, lr_0 = 2.0562e-04
Loss = 7.3958e-03, PNorm = 181.9467, GNorm = 0.1344, lr_0 = 2.0548e-04
Loss = 5.6819e-03, PNorm = 181.9508, GNorm = 0.0809, lr_0 = 2.0534e-04
Loss = 5.4891e-03, PNorm = 181.9559, GNorm = 0.2938, lr_0 = 2.0520e-04
Loss = 3.7543e-03, PNorm = 181.9609, GNorm = 0.1434, lr_0 = 2.0506e-04
Loss = 4.5127e-03, PNorm = 181.9654, GNorm = 0.1473, lr_0 = 2.0492e-04
Loss = 1.0285e-02, PNorm = 181.9700, GNorm = 0.1549, lr_0 = 2.0478e-04
Loss = 4.0594e-03, PNorm = 181.9748, GNorm = 0.1730, lr_0 = 2.0464e-04
Loss = 4.2468e-03, PNorm = 181.9785, GNorm = 0.1052, lr_0 = 2.0450e-04
Loss = 6.9778e-03, PNorm = 181.9821, GNorm = 0.2394, lr_0 = 2.0436e-04
Loss = 5.6063e-03, PNorm = 181.9873, GNorm = 0.2167, lr_0 = 2.0422e-04
Loss = 7.8046e-03, PNorm = 181.9906, GNorm = 0.1333, lr_0 = 2.0408e-04
Loss = 5.2384e-03, PNorm = 181.9946, GNorm = 0.1474, lr_0 = 2.0394e-04
Loss = 8.9639e-03, PNorm = 181.9998, GNorm = 0.2071, lr_0 = 2.0380e-04
Loss = 4.0189e-03, PNorm = 182.0057, GNorm = 0.0684, lr_0 = 2.0366e-04
Loss = 6.7374e-03, PNorm = 182.0110, GNorm = 0.1253, lr_0 = 2.0352e-04
Loss = 6.9800e-03, PNorm = 182.0155, GNorm = 0.0770, lr_0 = 2.0338e-04
Loss = 5.4675e-03, PNorm = 182.0197, GNorm = 0.0694, lr_0 = 2.0324e-04
Loss = 8.9343e-03, PNorm = 182.0221, GNorm = 0.2813, lr_0 = 2.0310e-04
Loss = 4.8793e-03, PNorm = 182.0262, GNorm = 0.2174, lr_0 = 2.0296e-04
Loss = 5.0640e-03, PNorm = 182.0288, GNorm = 0.1575, lr_0 = 2.0282e-04
Loss = 4.5830e-03, PNorm = 182.0330, GNorm = 0.1163, lr_0 = 2.0268e-04
Loss = 5.5791e-03, PNorm = 182.0371, GNorm = 0.6672, lr_0 = 2.0254e-04
Loss = 5.1546e-03, PNorm = 182.0409, GNorm = 0.2911, lr_0 = 2.0240e-04
Loss = 3.9234e-03, PNorm = 182.0441, GNorm = 0.0756, lr_0 = 2.0227e-04
Loss = 3.9233e-03, PNorm = 182.0477, GNorm = 0.1446, lr_0 = 2.0213e-04
Loss = 5.8340e-03, PNorm = 182.0531, GNorm = 0.0864, lr_0 = 2.0199e-04
Loss = 8.3959e-03, PNorm = 182.0578, GNorm = 0.1856, lr_0 = 2.0185e-04
Loss = 3.7476e-03, PNorm = 182.0628, GNorm = 0.2324, lr_0 = 2.0171e-04
Loss = 3.9058e-03, PNorm = 182.0666, GNorm = 0.1616, lr_0 = 2.0157e-04
Loss = 1.0035e-02, PNorm = 182.0706, GNorm = 0.1935, lr_0 = 2.0144e-04
Loss = 4.2489e-03, PNorm = 182.0760, GNorm = 0.1390, lr_0 = 2.0130e-04
Loss = 5.9980e-03, PNorm = 182.0807, GNorm = 0.1892, lr_0 = 2.0116e-04
Loss = 4.7291e-03, PNorm = 182.0868, GNorm = 0.1227, lr_0 = 2.0102e-04
Loss = 8.0529e-03, PNorm = 182.0913, GNorm = 0.0704, lr_0 = 2.0088e-04
Loss = 1.0426e-02, PNorm = 182.0966, GNorm = 0.5466, lr_0 = 2.0075e-04
Loss = 5.8396e-03, PNorm = 182.1021, GNorm = 0.3141, lr_0 = 2.0061e-04
Loss = 5.8566e-03, PNorm = 182.1079, GNorm = 0.2824, lr_0 = 2.0047e-04
Loss = 6.2995e-03, PNorm = 182.1119, GNorm = 0.0910, lr_0 = 2.0033e-04
Loss = 4.5619e-03, PNorm = 182.1162, GNorm = 0.2407, lr_0 = 2.0020e-04
Loss = 6.2506e-03, PNorm = 182.1218, GNorm = 0.3275, lr_0 = 2.0006e-04
Loss = 3.9187e-03, PNorm = 182.1261, GNorm = 0.1132, lr_0 = 1.9992e-04
Loss = 3.4971e-03, PNorm = 182.1308, GNorm = 0.1552, lr_0 = 1.9979e-04
Loss = 6.6806e-03, PNorm = 182.1360, GNorm = 0.2188, lr_0 = 1.9965e-04
Loss = 3.6854e-03, PNorm = 182.1407, GNorm = 0.1244, lr_0 = 1.9951e-04
Loss = 6.1973e-03, PNorm = 182.1466, GNorm = 0.1045, lr_0 = 1.9938e-04
Loss = 4.6975e-03, PNorm = 182.1515, GNorm = 0.1174, lr_0 = 1.9924e-04
Loss = 3.7248e-03, PNorm = 182.1568, GNorm = 0.1705, lr_0 = 1.9910e-04
Loss = 4.1259e-03, PNorm = 182.1612, GNorm = 0.1912, lr_0 = 1.9897e-04
Loss = 3.5398e-03, PNorm = 182.1666, GNorm = 0.1937, lr_0 = 1.9883e-04
Loss = 3.7291e-03, PNorm = 182.1713, GNorm = 0.0772, lr_0 = 1.9869e-04
Loss = 4.9169e-03, PNorm = 182.1740, GNorm = 0.1123, lr_0 = 1.9856e-04
Loss = 4.4445e-03, PNorm = 182.1768, GNorm = 0.1017, lr_0 = 1.9842e-04
Loss = 3.3775e-03, PNorm = 182.1805, GNorm = 0.2327, lr_0 = 1.9829e-04
Loss = 3.8663e-03, PNorm = 182.1830, GNorm = 0.0763, lr_0 = 1.9815e-04
Loss = 3.1893e-03, PNorm = 182.1871, GNorm = 0.1845, lr_0 = 1.9801e-04
Loss = 5.1015e-03, PNorm = 182.1926, GNorm = 0.5342, lr_0 = 1.9788e-04
Loss = 8.5431e-03, PNorm = 182.1989, GNorm = 0.1000, lr_0 = 1.9774e-04
Loss = 1.1241e-02, PNorm = 182.2034, GNorm = 0.0930, lr_0 = 1.9761e-04
Loss = 3.9786e-03, PNorm = 182.2092, GNorm = 0.1346, lr_0 = 1.9747e-04
Loss = 3.8924e-03, PNorm = 182.2131, GNorm = 0.1661, lr_0 = 1.9734e-04
Loss = 5.0293e-03, PNorm = 182.2196, GNorm = 0.1778, lr_0 = 1.9720e-04
Loss = 5.6351e-03, PNorm = 182.2241, GNorm = 0.0866, lr_0 = 1.9707e-04
Loss = 6.3214e-03, PNorm = 182.2305, GNorm = 0.1732, lr_0 = 1.9693e-04
Loss = 3.5355e-03, PNorm = 182.2348, GNorm = 0.1001, lr_0 = 1.9680e-04
Loss = 1.1384e-02, PNorm = 182.2384, GNorm = 0.1137, lr_0 = 1.9666e-04
Loss = 6.6074e-03, PNorm = 182.2427, GNorm = 0.3270, lr_0 = 1.9653e-04
Loss = 3.2833e-03, PNorm = 182.2468, GNorm = 0.0840, lr_0 = 1.9639e-04
Loss = 3.2463e-03, PNorm = 182.2514, GNorm = 0.0723, lr_0 = 1.9626e-04
Loss = 9.0397e-03, PNorm = 182.2550, GNorm = 0.1245, lr_0 = 1.9612e-04
Loss = 4.4845e-03, PNorm = 182.2601, GNorm = 0.2113, lr_0 = 1.9599e-04
Loss = 7.3422e-03, PNorm = 182.2669, GNorm = 0.2683, lr_0 = 1.9585e-04
Loss = 7.4342e-03, PNorm = 182.2722, GNorm = 0.3088, lr_0 = 1.9572e-04
Loss = 9.4658e-03, PNorm = 182.2746, GNorm = 0.9875, lr_0 = 1.9559e-04
Loss = 7.5794e-03, PNorm = 182.2763, GNorm = 0.1597, lr_0 = 1.9545e-04
Loss = 6.8148e-03, PNorm = 182.2807, GNorm = 0.1590, lr_0 = 1.9532e-04
Loss = 4.2237e-03, PNorm = 182.2853, GNorm = 0.1069, lr_0 = 1.9518e-04
Loss = 3.3556e-03, PNorm = 182.2915, GNorm = 0.1704, lr_0 = 1.9505e-04
Loss = 1.1643e-02, PNorm = 182.2962, GNorm = 0.1819, lr_0 = 1.9492e-04
Loss = 5.1666e-03, PNorm = 182.3013, GNorm = 0.1882, lr_0 = 1.9478e-04
Loss = 1.2409e-02, PNorm = 182.3058, GNorm = 0.8630, lr_0 = 1.9465e-04
Loss = 5.6573e-03, PNorm = 182.3085, GNorm = 0.1779, lr_0 = 1.9452e-04
Loss = 3.2469e-03, PNorm = 182.3132, GNorm = 0.1008, lr_0 = 1.9438e-04
Loss = 3.6084e-03, PNorm = 182.3193, GNorm = 0.1802, lr_0 = 1.9425e-04
Loss = 4.7944e-03, PNorm = 182.3230, GNorm = 0.1389, lr_0 = 1.9412e-04
Loss = 9.4587e-03, PNorm = 182.3281, GNorm = 0.6381, lr_0 = 1.9398e-04
Loss = 4.2281e-03, PNorm = 182.3326, GNorm = 0.1177, lr_0 = 1.9385e-04
Loss = 4.2837e-03, PNorm = 182.3396, GNorm = 0.1006, lr_0 = 1.9372e-04
Loss = 5.2806e-03, PNorm = 182.3458, GNorm = 0.2761, lr_0 = 1.9359e-04
Loss = 4.9933e-03, PNorm = 182.3511, GNorm = 0.3096, lr_0 = 1.9345e-04
Loss = 4.3212e-03, PNorm = 182.3565, GNorm = 0.2068, lr_0 = 1.9332e-04
Loss = 4.2908e-03, PNorm = 182.3616, GNorm = 0.1330, lr_0 = 1.9319e-04
Loss = 1.1428e-02, PNorm = 182.3663, GNorm = 0.1642, lr_0 = 1.9306e-04
Validation mae = 0.120836
Epoch 22
Loss = 5.5892e-03, PNorm = 182.3709, GNorm = 0.3658, lr_0 = 1.9292e-04
Loss = 3.7456e-03, PNorm = 182.3754, GNorm = 0.1225, lr_0 = 1.9279e-04
Loss = 6.2554e-03, PNorm = 182.3809, GNorm = 0.2771, lr_0 = 1.9266e-04
Loss = 4.0230e-03, PNorm = 182.3847, GNorm = 0.1351, lr_0 = 1.9253e-04
Loss = 4.6542e-03, PNorm = 182.3869, GNorm = 0.1510, lr_0 = 1.9240e-04
Loss = 7.3114e-03, PNorm = 182.3890, GNorm = 0.1904, lr_0 = 1.9226e-04
Loss = 3.0997e-03, PNorm = 182.3896, GNorm = 0.1166, lr_0 = 1.9213e-04
Loss = 4.8700e-03, PNorm = 182.3918, GNorm = 0.0736, lr_0 = 1.9200e-04
Loss = 1.0940e-02, PNorm = 182.3953, GNorm = 0.0987, lr_0 = 1.9187e-04
Loss = 7.1418e-03, PNorm = 182.3993, GNorm = 0.4455, lr_0 = 1.9174e-04
Loss = 4.3914e-03, PNorm = 182.4040, GNorm = 0.0968, lr_0 = 1.9161e-04
Loss = 7.1278e-03, PNorm = 182.4084, GNorm = 0.1398, lr_0 = 1.9148e-04
Loss = 3.3352e-03, PNorm = 182.4119, GNorm = 0.2182, lr_0 = 1.9134e-04
Loss = 6.4018e-03, PNorm = 182.4167, GNorm = 0.1248, lr_0 = 1.9121e-04
Loss = 2.9879e-03, PNorm = 182.4212, GNorm = 0.0698, lr_0 = 1.9108e-04
Loss = 8.8862e-03, PNorm = 182.4246, GNorm = 0.1565, lr_0 = 1.9095e-04
Loss = 3.5311e-03, PNorm = 182.4272, GNorm = 0.1333, lr_0 = 1.9082e-04
Loss = 4.3528e-03, PNorm = 182.4309, GNorm = 0.2115, lr_0 = 1.9069e-04
Loss = 3.2079e-03, PNorm = 182.4353, GNorm = 0.1240, lr_0 = 1.9056e-04
Loss = 3.0855e-03, PNorm = 182.4383, GNorm = 0.1369, lr_0 = 1.9043e-04
Loss = 6.4123e-03, PNorm = 182.4400, GNorm = 0.1420, lr_0 = 1.9030e-04
Loss = 3.0871e-03, PNorm = 182.4427, GNorm = 0.2392, lr_0 = 1.9017e-04
Loss = 4.5900e-03, PNorm = 182.4440, GNorm = 0.1393, lr_0 = 1.9004e-04
Loss = 3.7283e-03, PNorm = 182.4458, GNorm = 0.0760, lr_0 = 1.8991e-04
Loss = 2.5080e-03, PNorm = 182.4490, GNorm = 0.1825, lr_0 = 1.8978e-04
Loss = 4.4453e-03, PNorm = 182.4510, GNorm = 0.2030, lr_0 = 1.8965e-04
Loss = 3.7712e-03, PNorm = 182.4530, GNorm = 0.0795, lr_0 = 1.8952e-04
Loss = 6.6320e-03, PNorm = 182.4580, GNorm = 0.1699, lr_0 = 1.8939e-04
Loss = 7.5244e-03, PNorm = 182.4601, GNorm = 0.5320, lr_0 = 1.8926e-04
Loss = 6.2386e-03, PNorm = 182.4635, GNorm = 0.1329, lr_0 = 1.8913e-04
Loss = 8.0451e-03, PNorm = 182.4675, GNorm = 0.0800, lr_0 = 1.8900e-04
Loss = 8.0951e-03, PNorm = 182.4705, GNorm = 0.1769, lr_0 = 1.8887e-04
Loss = 5.0670e-03, PNorm = 182.4734, GNorm = 0.1089, lr_0 = 1.8874e-04
Loss = 3.3528e-03, PNorm = 182.4781, GNorm = 0.1399, lr_0 = 1.8861e-04
Loss = 3.9159e-03, PNorm = 182.4820, GNorm = 0.1285, lr_0 = 1.8848e-04
Loss = 4.7288e-03, PNorm = 182.4857, GNorm = 0.2000, lr_0 = 1.8835e-04
Loss = 5.4682e-03, PNorm = 182.4905, GNorm = 0.2035, lr_0 = 1.8822e-04
Loss = 3.4855e-03, PNorm = 182.4940, GNorm = 0.0970, lr_0 = 1.8809e-04
Loss = 3.1363e-03, PNorm = 182.4967, GNorm = 0.1045, lr_0 = 1.8797e-04
Loss = 7.2394e-03, PNorm = 182.4997, GNorm = 0.1226, lr_0 = 1.8784e-04
Loss = 3.0255e-03, PNorm = 182.5038, GNorm = 0.1687, lr_0 = 1.8771e-04
Loss = 6.1932e-03, PNorm = 182.5082, GNorm = 0.2348, lr_0 = 1.8758e-04
Loss = 5.1333e-03, PNorm = 182.5102, GNorm = 0.1245, lr_0 = 1.8745e-04
Loss = 6.2184e-03, PNorm = 182.5131, GNorm = 0.3144, lr_0 = 1.8732e-04
Loss = 3.4533e-03, PNorm = 182.5153, GNorm = 0.1491, lr_0 = 1.8719e-04
Loss = 3.1527e-03, PNorm = 182.5188, GNorm = 0.1517, lr_0 = 1.8707e-04
Loss = 4.5934e-03, PNorm = 182.5241, GNorm = 0.1362, lr_0 = 1.8694e-04
Loss = 3.7594e-03, PNorm = 182.5273, GNorm = 0.2685, lr_0 = 1.8681e-04
Loss = 3.9906e-03, PNorm = 182.5296, GNorm = 0.1477, lr_0 = 1.8668e-04
Loss = 4.5133e-03, PNorm = 182.5348, GNorm = 0.1296, lr_0 = 1.8655e-04
Loss = 3.2688e-03, PNorm = 182.5410, GNorm = 0.1209, lr_0 = 1.8643e-04
Loss = 5.5658e-03, PNorm = 182.5442, GNorm = 0.0996, lr_0 = 1.8630e-04
Loss = 4.7702e-03, PNorm = 182.5465, GNorm = 0.0655, lr_0 = 1.8617e-04
Loss = 8.4632e-03, PNorm = 182.5493, GNorm = 0.1039, lr_0 = 1.8604e-04
Loss = 3.9824e-03, PNorm = 182.5525, GNorm = 0.2356, lr_0 = 1.8592e-04
Loss = 6.5195e-03, PNorm = 182.5558, GNorm = 0.1477, lr_0 = 1.8579e-04
Loss = 4.3515e-03, PNorm = 182.5611, GNorm = 0.1479, lr_0 = 1.8566e-04
Loss = 2.7174e-03, PNorm = 182.5642, GNorm = 0.2012, lr_0 = 1.8553e-04
Loss = 2.8889e-03, PNorm = 182.5676, GNorm = 0.1846, lr_0 = 1.8541e-04
Loss = 3.8418e-03, PNorm = 182.5702, GNorm = 0.1033, lr_0 = 1.8528e-04
Loss = 3.1169e-03, PNorm = 182.5736, GNorm = 0.1523, lr_0 = 1.8515e-04
Loss = 6.0544e-03, PNorm = 182.5754, GNorm = 0.1239, lr_0 = 1.8503e-04
Loss = 3.7123e-03, PNorm = 182.5776, GNorm = 0.1568, lr_0 = 1.8490e-04
Loss = 6.6490e-03, PNorm = 182.5811, GNorm = 0.1637, lr_0 = 1.8477e-04
Loss = 7.1558e-03, PNorm = 182.5856, GNorm = 0.1892, lr_0 = 1.8465e-04
Loss = 3.0460e-03, PNorm = 182.5902, GNorm = 0.0866, lr_0 = 1.8452e-04
Loss = 3.2856e-03, PNorm = 182.5953, GNorm = 0.1822, lr_0 = 1.8439e-04
Loss = 4.2758e-03, PNorm = 182.5980, GNorm = 0.0795, lr_0 = 1.8427e-04
Loss = 7.3098e-03, PNorm = 182.6022, GNorm = 0.1546, lr_0 = 1.8414e-04
Loss = 6.8706e-03, PNorm = 182.6071, GNorm = 0.1769, lr_0 = 1.8401e-04
Loss = 3.0922e-03, PNorm = 182.6091, GNorm = 0.3035, lr_0 = 1.8389e-04
Loss = 4.0931e-03, PNorm = 182.6122, GNorm = 0.1254, lr_0 = 1.8376e-04
Loss = 5.2029e-03, PNorm = 182.6132, GNorm = 0.2786, lr_0 = 1.8364e-04
Loss = 6.1386e-03, PNorm = 182.6155, GNorm = 0.0882, lr_0 = 1.8351e-04
Loss = 3.5546e-03, PNorm = 182.6192, GNorm = 0.2166, lr_0 = 1.8338e-04
Loss = 6.1884e-03, PNorm = 182.6242, GNorm = 0.2008, lr_0 = 1.8326e-04
Loss = 2.9954e-03, PNorm = 182.6273, GNorm = 0.1169, lr_0 = 1.8313e-04
Loss = 7.3440e-03, PNorm = 182.6308, GNorm = 0.6653, lr_0 = 1.8301e-04
Loss = 2.9243e-03, PNorm = 182.6330, GNorm = 0.0723, lr_0 = 1.8288e-04
Loss = 7.6235e-03, PNorm = 182.6352, GNorm = 1.1688, lr_0 = 1.8276e-04
Loss = 4.0130e-03, PNorm = 182.6401, GNorm = 0.1261, lr_0 = 1.8263e-04
Loss = 3.6110e-03, PNorm = 182.6456, GNorm = 0.1225, lr_0 = 1.8251e-04
Loss = 8.5610e-03, PNorm = 182.6502, GNorm = 0.1715, lr_0 = 1.8238e-04
Loss = 4.6159e-03, PNorm = 182.6551, GNorm = 0.1333, lr_0 = 1.8226e-04
Loss = 3.4668e-03, PNorm = 182.6598, GNorm = 0.1551, lr_0 = 1.8213e-04
Loss = 2.7950e-03, PNorm = 182.6651, GNorm = 0.1344, lr_0 = 1.8201e-04
Loss = 3.5056e-03, PNorm = 182.6687, GNorm = 0.1366, lr_0 = 1.8188e-04
Loss = 2.9013e-03, PNorm = 182.6739, GNorm = 0.1304, lr_0 = 1.8176e-04
Loss = 7.0311e-03, PNorm = 182.6765, GNorm = 0.0882, lr_0 = 1.8163e-04
Loss = 5.7453e-03, PNorm = 182.6788, GNorm = 0.1346, lr_0 = 1.8151e-04
Loss = 4.4483e-03, PNorm = 182.6824, GNorm = 0.1170, lr_0 = 1.8138e-04
Loss = 5.2907e-03, PNorm = 182.6860, GNorm = 0.2105, lr_0 = 1.8126e-04
Loss = 3.5736e-03, PNorm = 182.6894, GNorm = 0.0734, lr_0 = 1.8114e-04
Loss = 2.9577e-03, PNorm = 182.6917, GNorm = 0.1003, lr_0 = 1.8101e-04
Loss = 9.0881e-03, PNorm = 182.6931, GNorm = 1.0631, lr_0 = 1.8089e-04
Loss = 2.7584e-03, PNorm = 182.6955, GNorm = 0.0505, lr_0 = 1.8076e-04
Loss = 5.4929e-03, PNorm = 182.6987, GNorm = 0.1380, lr_0 = 1.8064e-04
Loss = 2.2144e-02, PNorm = 182.7038, GNorm = 3.0871, lr_0 = 1.8052e-04
Loss = 7.0882e-03, PNorm = 182.7102, GNorm = 0.1877, lr_0 = 1.8039e-04
Loss = 3.8983e-03, PNorm = 182.7130, GNorm = 0.1100, lr_0 = 1.8027e-04
Loss = 4.5450e-03, PNorm = 182.7166, GNorm = 0.2091, lr_0 = 1.8015e-04
Loss = 6.1430e-03, PNorm = 182.7226, GNorm = 0.2014, lr_0 = 1.8002e-04
Loss = 4.8317e-03, PNorm = 182.7285, GNorm = 0.1026, lr_0 = 1.7990e-04
Loss = 6.9038e-03, PNorm = 182.7337, GNorm = 0.2910, lr_0 = 1.7978e-04
Loss = 2.7623e-03, PNorm = 182.7365, GNorm = 0.3827, lr_0 = 1.7965e-04
Loss = 6.9455e-03, PNorm = 182.7399, GNorm = 0.1865, lr_0 = 1.7953e-04
Loss = 5.2121e-03, PNorm = 182.7433, GNorm = 0.0960, lr_0 = 1.7941e-04
Loss = 4.0546e-03, PNorm = 182.7462, GNorm = 0.0849, lr_0 = 1.7928e-04
Loss = 4.4795e-03, PNorm = 182.7503, GNorm = 0.1072, lr_0 = 1.7916e-04
Loss = 3.1280e-03, PNorm = 182.7550, GNorm = 0.0802, lr_0 = 1.7904e-04
Loss = 5.3744e-03, PNorm = 182.7578, GNorm = 0.0924, lr_0 = 1.7892e-04
Loss = 6.5710e-03, PNorm = 182.7588, GNorm = 0.1610, lr_0 = 1.7879e-04
Loss = 3.6489e-03, PNorm = 182.7632, GNorm = 0.2905, lr_0 = 1.7867e-04
Loss = 4.0604e-03, PNorm = 182.7685, GNorm = 0.0889, lr_0 = 1.7855e-04
Loss = 6.6073e-03, PNorm = 182.7725, GNorm = 0.2003, lr_0 = 1.7843e-04
Loss = 3.2311e-03, PNorm = 182.7771, GNorm = 0.3414, lr_0 = 1.7830e-04
Loss = 7.8832e-03, PNorm = 182.7803, GNorm = 0.0933, lr_0 = 1.7818e-04
Loss = 7.5953e-03, PNorm = 182.7842, GNorm = 0.1721, lr_0 = 1.7806e-04
Loss = 5.1600e-03, PNorm = 182.7889, GNorm = 0.2575, lr_0 = 1.7794e-04
Loss = 9.3201e-03, PNorm = 182.7925, GNorm = 0.2406, lr_0 = 1.7782e-04
Validation mae = 0.120625
Epoch 23
Loss = 5.6892e-03, PNorm = 182.7948, GNorm = 0.1855, lr_0 = 1.7769e-04
Loss = 2.8913e-03, PNorm = 182.7991, GNorm = 0.1212, lr_0 = 1.7757e-04
Loss = 3.8758e-03, PNorm = 182.8022, GNorm = 0.1439, lr_0 = 1.7745e-04
Loss = 2.7047e-03, PNorm = 182.8049, GNorm = 0.1103, lr_0 = 1.7733e-04
Loss = 2.5370e-03, PNorm = 182.8075, GNorm = 0.3100, lr_0 = 1.7721e-04
Loss = 3.8236e-03, PNorm = 182.8106, GNorm = 0.1135, lr_0 = 1.7709e-04
Loss = 3.5007e-03, PNorm = 182.8133, GNorm = 0.3139, lr_0 = 1.7696e-04
Loss = 2.3412e-03, PNorm = 182.8169, GNorm = 0.1462, lr_0 = 1.7684e-04
Loss = 4.3819e-03, PNorm = 182.8215, GNorm = 0.1976, lr_0 = 1.7672e-04
Loss = 4.4239e-03, PNorm = 182.8254, GNorm = 0.7633, lr_0 = 1.7660e-04
Loss = 4.1217e-03, PNorm = 182.8292, GNorm = 0.2525, lr_0 = 1.7648e-04
Loss = 5.5297e-03, PNorm = 182.8335, GNorm = 0.1228, lr_0 = 1.7636e-04
Loss = 6.8159e-03, PNorm = 182.8355, GNorm = 0.0942, lr_0 = 1.7624e-04
Loss = 4.8333e-03, PNorm = 182.8377, GNorm = 0.0664, lr_0 = 1.7612e-04
Loss = 3.1611e-03, PNorm = 182.8409, GNorm = 0.0942, lr_0 = 1.7600e-04
Loss = 5.1376e-03, PNorm = 182.8452, GNorm = 0.1630, lr_0 = 1.7588e-04
Loss = 5.5654e-03, PNorm = 182.8480, GNorm = 0.0901, lr_0 = 1.7576e-04
Loss = 2.9982e-03, PNorm = 182.8498, GNorm = 0.2439, lr_0 = 1.7564e-04
Loss = 2.6519e-03, PNorm = 182.8528, GNorm = 0.1551, lr_0 = 1.7552e-04
Loss = 2.8615e-03, PNorm = 182.8539, GNorm = 0.0882, lr_0 = 1.7540e-04
Loss = 3.3061e-03, PNorm = 182.8562, GNorm = 0.1550, lr_0 = 1.7528e-04
Loss = 2.4584e-03, PNorm = 182.8599, GNorm = 0.2086, lr_0 = 1.7516e-04
Loss = 7.5369e-03, PNorm = 182.8625, GNorm = 0.1277, lr_0 = 1.7504e-04
Loss = 3.0692e-03, PNorm = 182.8653, GNorm = 0.0890, lr_0 = 1.7492e-04
Loss = 2.5327e-03, PNorm = 182.8690, GNorm = 0.1857, lr_0 = 1.7480e-04
Loss = 2.9666e-03, PNorm = 182.8720, GNorm = 0.1636, lr_0 = 1.7468e-04
Loss = 6.6239e-03, PNorm = 182.8773, GNorm = 0.0751, lr_0 = 1.7456e-04
Loss = 4.0285e-03, PNorm = 182.8807, GNorm = 0.0820, lr_0 = 1.7444e-04
Loss = 4.0350e-03, PNorm = 182.8830, GNorm = 0.1127, lr_0 = 1.7432e-04
Loss = 4.0400e-03, PNorm = 182.8842, GNorm = 0.1717, lr_0 = 1.7420e-04
Loss = 4.8390e-03, PNorm = 182.8862, GNorm = 0.0568, lr_0 = 1.7408e-04
Loss = 5.2138e-03, PNorm = 182.8885, GNorm = 0.1579, lr_0 = 1.7396e-04
Loss = 6.0065e-03, PNorm = 182.8929, GNorm = 0.0781, lr_0 = 1.7384e-04
Loss = 6.0332e-03, PNorm = 182.8959, GNorm = 0.1292, lr_0 = 1.7372e-04
Loss = 4.8307e-03, PNorm = 182.8987, GNorm = 0.1047, lr_0 = 1.7360e-04
Loss = 7.0040e-03, PNorm = 182.9016, GNorm = 0.1378, lr_0 = 1.7348e-04
Loss = 6.8077e-03, PNorm = 182.9043, GNorm = 0.0822, lr_0 = 1.7336e-04
Loss = 4.5896e-03, PNorm = 182.9080, GNorm = 0.2407, lr_0 = 1.7325e-04
Loss = 4.7550e-03, PNorm = 182.9099, GNorm = 0.0981, lr_0 = 1.7313e-04
Loss = 2.6750e-03, PNorm = 182.9135, GNorm = 0.1155, lr_0 = 1.7301e-04
Loss = 3.0370e-03, PNorm = 182.9165, GNorm = 0.0574, lr_0 = 1.7289e-04
Loss = 6.9454e-03, PNorm = 182.9186, GNorm = 0.1258, lr_0 = 1.7277e-04
Loss = 4.2169e-03, PNorm = 182.9211, GNorm = 0.1759, lr_0 = 1.7265e-04
Loss = 5.1002e-03, PNorm = 182.9244, GNorm = 0.0764, lr_0 = 1.7253e-04
Loss = 2.5987e-03, PNorm = 182.9283, GNorm = 0.0765, lr_0 = 1.7242e-04
Loss = 2.4783e-03, PNorm = 182.9318, GNorm = 0.0711, lr_0 = 1.7230e-04
Loss = 3.5327e-03, PNorm = 182.9352, GNorm = 0.1341, lr_0 = 1.7218e-04
Loss = 2.0633e-03, PNorm = 182.9387, GNorm = 0.0953, lr_0 = 1.7206e-04
Loss = 2.4964e-03, PNorm = 182.9410, GNorm = 0.0663, lr_0 = 1.7194e-04
Loss = 1.4553e-02, PNorm = 182.9463, GNorm = 0.1449, lr_0 = 1.7183e-04
Loss = 3.9788e-03, PNorm = 182.9487, GNorm = 0.1310, lr_0 = 1.7171e-04
Loss = 3.3408e-03, PNorm = 182.9506, GNorm = 0.1110, lr_0 = 1.7159e-04
Loss = 6.4859e-03, PNorm = 182.9533, GNorm = 0.5454, lr_0 = 1.7147e-04
Loss = 8.2855e-03, PNorm = 182.9557, GNorm = 0.0917, lr_0 = 1.7136e-04
Loss = 4.6949e-03, PNorm = 182.9588, GNorm = 0.1835, lr_0 = 1.7124e-04
Loss = 1.0683e-02, PNorm = 182.9635, GNorm = 0.1500, lr_0 = 1.7112e-04
Loss = 5.4869e-03, PNorm = 182.9660, GNorm = 0.2591, lr_0 = 1.7100e-04
Loss = 4.9941e-03, PNorm = 182.9704, GNorm = 0.0845, lr_0 = 1.7089e-04
Loss = 7.6070e-03, PNorm = 182.9729, GNorm = 0.1756, lr_0 = 1.7077e-04
Loss = 3.7488e-03, PNorm = 182.9778, GNorm = 0.1252, lr_0 = 1.7065e-04
Loss = 6.9333e-03, PNorm = 182.9833, GNorm = 0.2340, lr_0 = 1.7054e-04
Loss = 6.2232e-03, PNorm = 182.9879, GNorm = 0.2514, lr_0 = 1.7042e-04
Loss = 2.8715e-03, PNorm = 182.9924, GNorm = 0.0884, lr_0 = 1.7030e-04
Loss = 6.9610e-03, PNorm = 182.9965, GNorm = 0.0937, lr_0 = 1.7019e-04
Loss = 4.6899e-03, PNorm = 182.9988, GNorm = 0.0695, lr_0 = 1.7007e-04
Loss = 4.3191e-03, PNorm = 183.0010, GNorm = 0.1777, lr_0 = 1.6995e-04
Loss = 3.4441e-03, PNorm = 183.0040, GNorm = 0.1121, lr_0 = 1.6984e-04
Loss = 3.2856e-03, PNorm = 183.0080, GNorm = 0.1961, lr_0 = 1.6972e-04
Loss = 6.6896e-03, PNorm = 183.0124, GNorm = 0.0642, lr_0 = 1.6960e-04
Loss = 2.7815e-03, PNorm = 183.0161, GNorm = 0.3537, lr_0 = 1.6949e-04
Loss = 3.0532e-03, PNorm = 183.0200, GNorm = 0.0717, lr_0 = 1.6937e-04
Loss = 6.5246e-03, PNorm = 183.0213, GNorm = 0.4343, lr_0 = 1.6926e-04
Loss = 2.8021e-03, PNorm = 183.0244, GNorm = 0.0865, lr_0 = 1.6914e-04
Loss = 3.7110e-03, PNorm = 183.0283, GNorm = 0.1484, lr_0 = 1.6902e-04
Loss = 4.1435e-03, PNorm = 183.0307, GNorm = 0.0898, lr_0 = 1.6891e-04
Loss = 2.3936e-03, PNorm = 183.0338, GNorm = 0.1315, lr_0 = 1.6879e-04
Loss = 2.7728e-03, PNorm = 183.0361, GNorm = 0.1879, lr_0 = 1.6868e-04
Loss = 3.5928e-03, PNorm = 183.0373, GNorm = 0.1102, lr_0 = 1.6856e-04
Loss = 2.4728e-03, PNorm = 183.0415, GNorm = 0.1648, lr_0 = 1.6845e-04
Loss = 3.2201e-03, PNorm = 183.0458, GNorm = 0.1144, lr_0 = 1.6833e-04
Loss = 3.3836e-03, PNorm = 183.0490, GNorm = 0.0775, lr_0 = 1.6821e-04
Loss = 2.5577e-03, PNorm = 183.0524, GNorm = 0.0657, lr_0 = 1.6810e-04
Loss = 6.3174e-03, PNorm = 183.0563, GNorm = 0.1249, lr_0 = 1.6798e-04
Loss = 4.1325e-03, PNorm = 183.0587, GNorm = 0.4993, lr_0 = 1.6787e-04
Loss = 9.7925e-03, PNorm = 183.0591, GNorm = 0.2770, lr_0 = 1.6775e-04
Loss = 2.3228e-03, PNorm = 183.0626, GNorm = 0.0725, lr_0 = 1.6764e-04
Loss = 7.9198e-03, PNorm = 183.0664, GNorm = 0.1936, lr_0 = 1.6752e-04
Loss = 4.2356e-03, PNorm = 183.0705, GNorm = 0.1129, lr_0 = 1.6741e-04
Loss = 4.2668e-03, PNorm = 183.0739, GNorm = 0.0718, lr_0 = 1.6729e-04
Loss = 3.0413e-03, PNorm = 183.0783, GNorm = 0.1829, lr_0 = 1.6718e-04
Loss = 3.1660e-03, PNorm = 183.0823, GNorm = 0.1580, lr_0 = 1.6707e-04
Loss = 6.2615e-03, PNorm = 183.0866, GNorm = 0.0494, lr_0 = 1.6695e-04
Loss = 3.5236e-03, PNorm = 183.0896, GNorm = 0.1111, lr_0 = 1.6684e-04
Loss = 2.6546e-03, PNorm = 183.0930, GNorm = 0.1457, lr_0 = 1.6672e-04
Loss = 2.5123e-03, PNorm = 183.0969, GNorm = 0.1580, lr_0 = 1.6661e-04
Loss = 4.7553e-03, PNorm = 183.0991, GNorm = 0.1736, lr_0 = 1.6649e-04
Loss = 1.1022e-02, PNorm = 183.1018, GNorm = 0.4336, lr_0 = 1.6638e-04
Loss = 2.3630e-03, PNorm = 183.1041, GNorm = 0.0873, lr_0 = 1.6627e-04
Loss = 2.4486e-03, PNorm = 183.1061, GNorm = 0.0620, lr_0 = 1.6615e-04
Loss = 2.7457e-03, PNorm = 183.1099, GNorm = 0.1718, lr_0 = 1.6604e-04
Loss = 4.9762e-03, PNorm = 183.1135, GNorm = 0.1797, lr_0 = 1.6592e-04
Loss = 6.3932e-03, PNorm = 183.1164, GNorm = 0.1183, lr_0 = 1.6581e-04
Loss = 4.2570e-03, PNorm = 183.1194, GNorm = 0.2103, lr_0 = 1.6570e-04
Loss = 7.1668e-03, PNorm = 183.1240, GNorm = 0.1169, lr_0 = 1.6558e-04
Loss = 7.5426e-03, PNorm = 183.1267, GNorm = 0.1296, lr_0 = 1.6547e-04
Loss = 5.8446e-03, PNorm = 183.1298, GNorm = 0.2484, lr_0 = 1.6536e-04
Loss = 2.4665e-03, PNorm = 183.1333, GNorm = 0.0884, lr_0 = 1.6524e-04
Loss = 4.5844e-03, PNorm = 183.1355, GNorm = 0.0947, lr_0 = 1.6513e-04
Loss = 5.4258e-03, PNorm = 183.1398, GNorm = 0.1741, lr_0 = 1.6502e-04
Loss = 4.8118e-03, PNorm = 183.1430, GNorm = 0.1207, lr_0 = 1.6490e-04
Loss = 2.1459e-03, PNorm = 183.1454, GNorm = 0.1002, lr_0 = 1.6479e-04
Loss = 5.0924e-03, PNorm = 183.1491, GNorm = 0.0826, lr_0 = 1.6468e-04
Loss = 3.2728e-03, PNorm = 183.1523, GNorm = 0.1321, lr_0 = 1.6457e-04
Loss = 2.3141e-03, PNorm = 183.1561, GNorm = 0.0782, lr_0 = 1.6445e-04
Loss = 7.0942e-03, PNorm = 183.1606, GNorm = 0.1861, lr_0 = 1.6434e-04
Loss = 6.0653e-03, PNorm = 183.1641, GNorm = 0.1916, lr_0 = 1.6423e-04
Loss = 3.8483e-03, PNorm = 183.1687, GNorm = 0.3344, lr_0 = 1.6412e-04
Loss = 5.2103e-03, PNorm = 183.1719, GNorm = 0.1305, lr_0 = 1.6400e-04
Loss = 4.3881e-03, PNorm = 183.1747, GNorm = 0.1020, lr_0 = 1.6389e-04
Loss = 2.5192e-03, PNorm = 183.1774, GNorm = 0.1601, lr_0 = 1.6378e-04
Validation mae = 0.120606
Epoch 24
Loss = 2.5898e-03, PNorm = 183.1793, GNorm = 0.1696, lr_0 = 1.6367e-04
Loss = 4.7011e-03, PNorm = 183.1813, GNorm = 0.1662, lr_0 = 1.6355e-04
Loss = 2.3000e-03, PNorm = 183.1842, GNorm = 0.0818, lr_0 = 1.6344e-04
Loss = 4.8970e-03, PNorm = 183.1851, GNorm = 0.1173, lr_0 = 1.6333e-04
Loss = 4.9793e-03, PNorm = 183.1875, GNorm = 0.0686, lr_0 = 1.6322e-04
Loss = 6.0843e-03, PNorm = 183.1901, GNorm = 0.0481, lr_0 = 1.6311e-04
Loss = 7.0525e-03, PNorm = 183.1925, GNorm = 0.0722, lr_0 = 1.6299e-04
Loss = 1.4626e-02, PNorm = 183.1945, GNorm = 0.1511, lr_0 = 1.6288e-04
Loss = 4.6110e-03, PNorm = 183.1982, GNorm = 0.2216, lr_0 = 1.6277e-04
Loss = 4.0673e-03, PNorm = 183.1994, GNorm = 0.1098, lr_0 = 1.6266e-04
Loss = 3.0617e-03, PNorm = 183.2035, GNorm = 0.1721, lr_0 = 1.6255e-04
Loss = 2.1039e-03, PNorm = 183.2065, GNorm = 0.1677, lr_0 = 1.6244e-04
Loss = 2.2249e-03, PNorm = 183.2082, GNorm = 0.1191, lr_0 = 1.6233e-04
Loss = 4.2288e-03, PNorm = 183.2100, GNorm = 0.1918, lr_0 = 1.6221e-04
Loss = 2.6234e-03, PNorm = 183.2110, GNorm = 0.2002, lr_0 = 1.6210e-04
Loss = 2.8143e-03, PNorm = 183.2115, GNorm = 0.0548, lr_0 = 1.6199e-04
Loss = 2.8717e-03, PNorm = 183.2145, GNorm = 0.1329, lr_0 = 1.6188e-04
Loss = 3.4342e-03, PNorm = 183.2161, GNorm = 0.0863, lr_0 = 1.6177e-04
Loss = 3.5441e-03, PNorm = 183.2183, GNorm = 0.0852, lr_0 = 1.6166e-04
Loss = 5.9516e-03, PNorm = 183.2210, GNorm = 0.3259, lr_0 = 1.6155e-04
Loss = 5.4182e-03, PNorm = 183.2230, GNorm = 0.0900, lr_0 = 1.6144e-04
Loss = 3.5572e-03, PNorm = 183.2270, GNorm = 0.1711, lr_0 = 1.6133e-04
Loss = 4.7255e-03, PNorm = 183.2292, GNorm = 0.4511, lr_0 = 1.6122e-04
Loss = 4.0752e-03, PNorm = 183.2298, GNorm = 0.1290, lr_0 = 1.6111e-04
Loss = 5.0589e-03, PNorm = 183.2330, GNorm = 0.1071, lr_0 = 1.6100e-04
Loss = 3.6419e-03, PNorm = 183.2364, GNorm = 0.3186, lr_0 = 1.6089e-04
Loss = 5.0586e-03, PNorm = 183.2396, GNorm = 0.4255, lr_0 = 1.6078e-04
Loss = 7.5404e-03, PNorm = 183.2419, GNorm = 0.0817, lr_0 = 1.6067e-04
Loss = 2.0279e-03, PNorm = 183.2461, GNorm = 0.0520, lr_0 = 1.6056e-04
Loss = 4.9035e-03, PNorm = 183.2495, GNorm = 0.2545, lr_0 = 1.6045e-04
Loss = 3.8823e-03, PNorm = 183.2526, GNorm = 0.1182, lr_0 = 1.6034e-04
Loss = 8.0012e-03, PNorm = 183.2570, GNorm = 0.1006, lr_0 = 1.6023e-04
Loss = 2.0492e-03, PNorm = 183.2605, GNorm = 0.0657, lr_0 = 1.6012e-04
Loss = 3.1687e-03, PNorm = 183.2635, GNorm = 0.1095, lr_0 = 1.6001e-04
Loss = 4.2853e-03, PNorm = 183.2659, GNorm = 0.0643, lr_0 = 1.5990e-04
Loss = 3.9541e-03, PNorm = 183.2681, GNorm = 0.1932, lr_0 = 1.5979e-04
Loss = 2.7117e-03, PNorm = 183.2711, GNorm = 0.0885, lr_0 = 1.5968e-04
Loss = 4.0096e-03, PNorm = 183.2730, GNorm = 0.2512, lr_0 = 1.5957e-04
Loss = 4.4447e-03, PNorm = 183.2738, GNorm = 0.1768, lr_0 = 1.5946e-04
Loss = 2.6968e-03, PNorm = 183.2763, GNorm = 0.1087, lr_0 = 1.5935e-04
Loss = 9.0710e-03, PNorm = 183.2793, GNorm = 0.3770, lr_0 = 1.5924e-04
Loss = 2.9345e-03, PNorm = 183.2825, GNorm = 0.1231, lr_0 = 1.5913e-04
Loss = 4.6345e-03, PNorm = 183.2846, GNorm = 0.3224, lr_0 = 1.5902e-04
Loss = 3.7819e-03, PNorm = 183.2860, GNorm = 0.1357, lr_0 = 1.5891e-04
Loss = 9.9735e-03, PNorm = 183.2875, GNorm = 0.2098, lr_0 = 1.5880e-04
Loss = 5.0035e-03, PNorm = 183.2904, GNorm = 0.1701, lr_0 = 1.5870e-04
Loss = 2.9706e-03, PNorm = 183.2924, GNorm = 0.1214, lr_0 = 1.5859e-04
Loss = 3.6420e-03, PNorm = 183.2948, GNorm = 0.1279, lr_0 = 1.5848e-04
Loss = 7.1077e-03, PNorm = 183.2982, GNorm = 0.2709, lr_0 = 1.5837e-04
Loss = 6.6626e-03, PNorm = 183.3013, GNorm = 0.0772, lr_0 = 1.5826e-04
Loss = 6.7249e-03, PNorm = 183.3036, GNorm = 0.5349, lr_0 = 1.5815e-04
Loss = 2.6745e-03, PNorm = 183.3046, GNorm = 0.2393, lr_0 = 1.5804e-04
Loss = 2.4398e-03, PNorm = 183.3066, GNorm = 0.1486, lr_0 = 1.5794e-04
Loss = 2.2270e-03, PNorm = 183.3105, GNorm = 0.1085, lr_0 = 1.5783e-04
Loss = 4.6248e-03, PNorm = 183.3144, GNorm = 0.1206, lr_0 = 1.5772e-04
Loss = 2.9873e-03, PNorm = 183.3198, GNorm = 0.2662, lr_0 = 1.5761e-04
Loss = 5.9995e-03, PNorm = 183.3239, GNorm = 0.3513, lr_0 = 1.5750e-04
Loss = 3.4745e-03, PNorm = 183.3278, GNorm = 0.1784, lr_0 = 1.5740e-04
Loss = 4.3442e-03, PNorm = 183.3319, GNorm = 0.1005, lr_0 = 1.5729e-04
Loss = 3.4996e-03, PNorm = 183.3342, GNorm = 0.1644, lr_0 = 1.5718e-04
Loss = 5.3456e-03, PNorm = 183.3352, GNorm = 0.1235, lr_0 = 1.5707e-04
Loss = 5.7090e-03, PNorm = 183.3371, GNorm = 0.0971, lr_0 = 1.5697e-04
Loss = 2.1451e-03, PNorm = 183.3386, GNorm = 0.0548, lr_0 = 1.5686e-04
Loss = 3.0120e-03, PNorm = 183.3397, GNorm = 0.0826, lr_0 = 1.5675e-04
Loss = 4.0064e-03, PNorm = 183.3421, GNorm = 0.1807, lr_0 = 1.5664e-04
Loss = 2.6068e-03, PNorm = 183.3459, GNorm = 0.2000, lr_0 = 1.5654e-04
Loss = 3.0609e-03, PNorm = 183.3482, GNorm = 0.0865, lr_0 = 1.5643e-04
Loss = 7.2298e-03, PNorm = 183.3500, GNorm = 0.1194, lr_0 = 1.5632e-04
Loss = 3.2456e-03, PNorm = 183.3547, GNorm = 0.1106, lr_0 = 1.5621e-04
Loss = 2.1738e-03, PNorm = 183.3583, GNorm = 0.1936, lr_0 = 1.5611e-04
Loss = 1.7379e-03, PNorm = 183.3609, GNorm = 0.1065, lr_0 = 1.5600e-04
Loss = 2.1495e-03, PNorm = 183.3637, GNorm = 0.1107, lr_0 = 1.5589e-04
Loss = 3.6833e-03, PNorm = 183.3675, GNorm = 0.3518, lr_0 = 1.5579e-04
Loss = 2.8298e-03, PNorm = 183.3720, GNorm = 0.0848, lr_0 = 1.5568e-04
Loss = 3.3515e-03, PNorm = 183.3752, GNorm = 0.2057, lr_0 = 1.5557e-04
Loss = 4.9978e-03, PNorm = 183.3777, GNorm = 0.1174, lr_0 = 1.5547e-04
Loss = 2.2305e-03, PNorm = 183.3802, GNorm = 0.0566, lr_0 = 1.5536e-04
Loss = 3.9795e-03, PNorm = 183.3817, GNorm = 0.1879, lr_0 = 1.5525e-04
Loss = 7.2104e-03, PNorm = 183.3822, GNorm = 0.1970, lr_0 = 1.5515e-04
Loss = 8.3678e-03, PNorm = 183.3858, GNorm = 0.0929, lr_0 = 1.5504e-04
Loss = 2.0572e-03, PNorm = 183.3899, GNorm = 0.0775, lr_0 = 1.5493e-04
Loss = 7.0608e-03, PNorm = 183.3934, GNorm = 0.7112, lr_0 = 1.5483e-04
Loss = 6.1615e-03, PNorm = 183.3972, GNorm = 0.1601, lr_0 = 1.5472e-04
Loss = 5.3114e-03, PNorm = 183.4000, GNorm = 0.3694, lr_0 = 1.5462e-04
Loss = 5.2138e-03, PNorm = 183.4043, GNorm = 0.0678, lr_0 = 1.5451e-04
Loss = 5.0244e-03, PNorm = 183.4074, GNorm = 0.0528, lr_0 = 1.5440e-04
Loss = 4.6758e-03, PNorm = 183.4105, GNorm = 0.1183, lr_0 = 1.5430e-04
Loss = 3.8715e-03, PNorm = 183.4138, GNorm = 0.0841, lr_0 = 1.5419e-04
Loss = 3.0783e-03, PNorm = 183.4166, GNorm = 0.2029, lr_0 = 1.5409e-04
Loss = 3.0288e-03, PNorm = 183.4192, GNorm = 0.1930, lr_0 = 1.5398e-04
Loss = 2.9170e-03, PNorm = 183.4211, GNorm = 0.1091, lr_0 = 1.5388e-04
Loss = 9.5126e-03, PNorm = 183.4233, GNorm = 0.0411, lr_0 = 1.5377e-04
Loss = 6.0996e-03, PNorm = 183.4265, GNorm = 1.2981, lr_0 = 1.5367e-04
Loss = 3.4335e-03, PNorm = 183.4295, GNorm = 0.1609, lr_0 = 1.5356e-04
Loss = 4.6175e-03, PNorm = 183.4309, GNorm = 0.1919, lr_0 = 1.5346e-04
Loss = 2.0997e-03, PNorm = 183.4347, GNorm = 0.0781, lr_0 = 1.5335e-04
Loss = 1.9936e-03, PNorm = 183.4386, GNorm = 0.1863, lr_0 = 1.5325e-04
Loss = 1.0161e-02, PNorm = 183.4397, GNorm = 0.1486, lr_0 = 1.5314e-04
Loss = 2.3769e-03, PNorm = 183.4415, GNorm = 0.1586, lr_0 = 1.5304e-04
Loss = 2.0364e-03, PNorm = 183.4441, GNorm = 0.0745, lr_0 = 1.5293e-04
Loss = 2.5639e-03, PNorm = 183.4458, GNorm = 0.0700, lr_0 = 1.5283e-04
Loss = 2.0020e-03, PNorm = 183.4470, GNorm = 0.0955, lr_0 = 1.5272e-04
Loss = 2.0224e-03, PNorm = 183.4491, GNorm = 0.1927, lr_0 = 1.5262e-04
Loss = 8.0436e-03, PNorm = 183.4539, GNorm = 0.1646, lr_0 = 1.5251e-04
Loss = 3.1128e-03, PNorm = 183.4571, GNorm = 0.0938, lr_0 = 1.5241e-04
Loss = 2.4160e-03, PNorm = 183.4573, GNorm = 0.0973, lr_0 = 1.5230e-04
Loss = 4.4520e-03, PNorm = 183.4599, GNorm = 0.1201, lr_0 = 1.5220e-04
Loss = 2.8133e-03, PNorm = 183.4617, GNorm = 0.1004, lr_0 = 1.5209e-04
Loss = 3.1423e-03, PNorm = 183.4645, GNorm = 0.1084, lr_0 = 1.5199e-04
Loss = 4.3720e-03, PNorm = 183.4658, GNorm = 0.1093, lr_0 = 1.5189e-04
Loss = 3.4173e-03, PNorm = 183.4671, GNorm = 0.1488, lr_0 = 1.5178e-04
Loss = 4.0217e-03, PNorm = 183.4696, GNorm = 0.2508, lr_0 = 1.5168e-04
Loss = 1.8691e-03, PNorm = 183.4725, GNorm = 0.1491, lr_0 = 1.5157e-04
Loss = 3.5404e-03, PNorm = 183.4754, GNorm = 0.1538, lr_0 = 1.5147e-04
Loss = 4.4264e-03, PNorm = 183.4782, GNorm = 0.1515, lr_0 = 1.5137e-04
Loss = 2.2523e-03, PNorm = 183.4818, GNorm = 0.0830, lr_0 = 1.5126e-04
Loss = 2.2467e-03, PNorm = 183.4854, GNorm = 0.1105, lr_0 = 1.5116e-04
Loss = 2.1222e-03, PNorm = 183.4877, GNorm = 0.0836, lr_0 = 1.5106e-04
Loss = 2.8061e-03, PNorm = 183.4891, GNorm = 0.1520, lr_0 = 1.5095e-04
Loss = 2.0403e-03, PNorm = 183.4902, GNorm = 0.1991, lr_0 = 1.5085e-04
Validation mae = 0.120744
Epoch 25
Loss = 2.8588e-03, PNorm = 183.4912, GNorm = 0.1713, lr_0 = 1.5075e-04
Loss = 3.8903e-03, PNorm = 183.4920, GNorm = 0.1132, lr_0 = 1.5064e-04
Loss = 2.2831e-03, PNorm = 183.4940, GNorm = 0.0429, lr_0 = 1.5054e-04
Loss = 2.1188e-03, PNorm = 183.4958, GNorm = 0.0525, lr_0 = 1.5044e-04
Loss = 3.1325e-03, PNorm = 183.4978, GNorm = 0.1695, lr_0 = 1.5033e-04
Loss = 3.0340e-03, PNorm = 183.4994, GNorm = 0.7413, lr_0 = 1.5023e-04
Loss = 4.0776e-03, PNorm = 183.5014, GNorm = 0.1532, lr_0 = 1.5013e-04
Loss = 1.8794e-03, PNorm = 183.5044, GNorm = 0.1439, lr_0 = 1.5002e-04
Loss = 3.4127e-03, PNorm = 183.5069, GNorm = 0.2123, lr_0 = 1.4992e-04
Loss = 3.9677e-03, PNorm = 183.5094, GNorm = 0.5555, lr_0 = 1.4982e-04
Loss = 6.7761e-03, PNorm = 183.5112, GNorm = 0.0749, lr_0 = 1.4972e-04
Loss = 5.1976e-03, PNorm = 183.5141, GNorm = 0.1554, lr_0 = 1.4961e-04
Loss = 3.0607e-03, PNorm = 183.5155, GNorm = 0.1326, lr_0 = 1.4951e-04
Loss = 2.0457e-03, PNorm = 183.5183, GNorm = 0.1550, lr_0 = 1.4941e-04
Loss = 5.7955e-03, PNorm = 183.5210, GNorm = 0.0941, lr_0 = 1.4931e-04
Loss = 3.5415e-03, PNorm = 183.5232, GNorm = 0.0606, lr_0 = 1.4920e-04
Loss = 4.6387e-03, PNorm = 183.5267, GNorm = 0.1639, lr_0 = 1.4910e-04
Loss = 4.9179e-03, PNorm = 183.5300, GNorm = 0.0652, lr_0 = 1.4900e-04
Loss = 3.4115e-03, PNorm = 183.5330, GNorm = 0.0941, lr_0 = 1.4890e-04
Loss = 1.9714e-03, PNorm = 183.5355, GNorm = 0.0687, lr_0 = 1.4880e-04
Loss = 6.2820e-03, PNorm = 183.5388, GNorm = 0.0574, lr_0 = 1.4869e-04
Loss = 4.0757e-03, PNorm = 183.5404, GNorm = 0.0984, lr_0 = 1.4859e-04
Loss = 3.0440e-03, PNorm = 183.5411, GNorm = 0.1849, lr_0 = 1.4849e-04
Loss = 6.7852e-03, PNorm = 183.5434, GNorm = 0.1478, lr_0 = 1.4839e-04
Loss = 3.1006e-03, PNorm = 183.5437, GNorm = 0.1995, lr_0 = 1.4829e-04
Loss = 4.6173e-03, PNorm = 183.5450, GNorm = 0.1294, lr_0 = 1.4818e-04
Loss = 2.8021e-03, PNorm = 183.5471, GNorm = 0.1722, lr_0 = 1.4808e-04
Loss = 4.8241e-03, PNorm = 183.5489, GNorm = 0.2200, lr_0 = 1.4798e-04
Loss = 2.3966e-03, PNorm = 183.5515, GNorm = 0.0808, lr_0 = 1.4788e-04
Loss = 2.1685e-03, PNorm = 183.5549, GNorm = 0.1304, lr_0 = 1.4778e-04
Loss = 3.6647e-03, PNorm = 183.5563, GNorm = 0.0626, lr_0 = 1.4768e-04
Loss = 4.0493e-03, PNorm = 183.5574, GNorm = 0.1147, lr_0 = 1.4758e-04
Loss = 4.2224e-03, PNorm = 183.5588, GNorm = 0.1749, lr_0 = 1.4748e-04
Loss = 9.0785e-03, PNorm = 183.5608, GNorm = 0.1313, lr_0 = 1.4737e-04
Loss = 1.9427e-03, PNorm = 183.5630, GNorm = 0.1053, lr_0 = 1.4727e-04
Loss = 4.4910e-03, PNorm = 183.5654, GNorm = 0.1452, lr_0 = 1.4717e-04
Loss = 8.8233e-03, PNorm = 183.5683, GNorm = 0.0954, lr_0 = 1.4707e-04
Loss = 2.4887e-03, PNorm = 183.5708, GNorm = 0.1208, lr_0 = 1.4697e-04
Loss = 1.9617e-03, PNorm = 183.5731, GNorm = 0.1276, lr_0 = 1.4687e-04
Loss = 5.4656e-03, PNorm = 183.5755, GNorm = 0.1035, lr_0 = 1.4677e-04
Loss = 2.4658e-03, PNorm = 183.5784, GNorm = 0.1152, lr_0 = 1.4667e-04
Loss = 2.4627e-03, PNorm = 183.5816, GNorm = 0.1284, lr_0 = 1.4657e-04
Loss = 4.0326e-03, PNorm = 183.5834, GNorm = 0.1734, lr_0 = 1.4647e-04
Loss = 3.7724e-03, PNorm = 183.5863, GNorm = 0.1087, lr_0 = 1.4637e-04
Loss = 1.7989e-03, PNorm = 183.5889, GNorm = 0.0979, lr_0 = 1.4627e-04
Loss = 4.6736e-03, PNorm = 183.5902, GNorm = 0.1400, lr_0 = 1.4617e-04
Loss = 3.1653e-03, PNorm = 183.5931, GNorm = 0.2192, lr_0 = 1.4607e-04
Loss = 3.1974e-03, PNorm = 183.5955, GNorm = 0.1390, lr_0 = 1.4597e-04
Loss = 2.5598e-03, PNorm = 183.5986, GNorm = 0.0883, lr_0 = 1.4587e-04
Loss = 1.8889e-02, PNorm = 183.6033, GNorm = 0.2316, lr_0 = 1.4577e-04
Loss = 5.0748e-03, PNorm = 183.6038, GNorm = 0.2205, lr_0 = 1.4567e-04
Loss = 4.7098e-03, PNorm = 183.6049, GNorm = 0.2430, lr_0 = 1.4557e-04
Loss = 3.9760e-03, PNorm = 183.6074, GNorm = 0.0960, lr_0 = 1.4547e-04
Loss = 1.6521e-03, PNorm = 183.6094, GNorm = 0.0837, lr_0 = 1.4537e-04
Loss = 2.1400e-03, PNorm = 183.6118, GNorm = 0.0515, lr_0 = 1.4527e-04
Loss = 2.2056e-03, PNorm = 183.6144, GNorm = 0.0796, lr_0 = 1.4517e-04
Loss = 2.4381e-03, PNorm = 183.6170, GNorm = 0.1180, lr_0 = 1.4507e-04
Loss = 5.6152e-03, PNorm = 183.6207, GNorm = 0.0527, lr_0 = 1.4497e-04
Loss = 6.6581e-03, PNorm = 183.6225, GNorm = 0.1990, lr_0 = 1.4487e-04
Loss = 3.7931e-03, PNorm = 183.6256, GNorm = 0.1173, lr_0 = 1.4477e-04
Loss = 3.4294e-03, PNorm = 183.6296, GNorm = 0.0516, lr_0 = 1.4467e-04
Loss = 3.1116e-03, PNorm = 183.6311, GNorm = 0.0427, lr_0 = 1.4457e-04
Loss = 3.5000e-03, PNorm = 183.6316, GNorm = 0.1211, lr_0 = 1.4447e-04
Loss = 6.6389e-03, PNorm = 183.6325, GNorm = 0.0619, lr_0 = 1.4438e-04
Loss = 2.7467e-03, PNorm = 183.6349, GNorm = 0.1563, lr_0 = 1.4428e-04
Loss = 4.9764e-03, PNorm = 183.6384, GNorm = 0.0847, lr_0 = 1.4418e-04
Loss = 1.8324e-03, PNorm = 183.6409, GNorm = 0.0547, lr_0 = 1.4408e-04
Loss = 1.8753e-03, PNorm = 183.6443, GNorm = 0.0792, lr_0 = 1.4398e-04
Loss = 2.8121e-03, PNorm = 183.6479, GNorm = 0.1967, lr_0 = 1.4388e-04
Loss = 4.3784e-03, PNorm = 183.6505, GNorm = 0.0636, lr_0 = 1.4378e-04
Loss = 6.0186e-03, PNorm = 183.6516, GNorm = 0.1314, lr_0 = 1.4368e-04
Loss = 1.8257e-03, PNorm = 183.6517, GNorm = 0.1155, lr_0 = 1.4359e-04
Loss = 1.6340e-03, PNorm = 183.6528, GNorm = 0.0980, lr_0 = 1.4349e-04
Loss = 2.7403e-03, PNorm = 183.6551, GNorm = 0.1429, lr_0 = 1.4339e-04
Loss = 3.8416e-03, PNorm = 183.6573, GNorm = 0.1261, lr_0 = 1.4329e-04
Loss = 2.6154e-03, PNorm = 183.6593, GNorm = 0.1818, lr_0 = 1.4319e-04
Loss = 2.5222e-03, PNorm = 183.6621, GNorm = 0.1370, lr_0 = 1.4310e-04
Loss = 4.7973e-03, PNorm = 183.6639, GNorm = 0.1427, lr_0 = 1.4300e-04
Loss = 1.9329e-03, PNorm = 183.6654, GNorm = 0.0880, lr_0 = 1.4290e-04
Loss = 5.8222e-03, PNorm = 183.6671, GNorm = 0.8488, lr_0 = 1.4280e-04
Loss = 6.6378e-03, PNorm = 183.6700, GNorm = 0.2679, lr_0 = 1.4270e-04
Loss = 4.3346e-03, PNorm = 183.6731, GNorm = 0.1118, lr_0 = 1.4261e-04
Loss = 5.6926e-03, PNorm = 183.6767, GNorm = 0.1225, lr_0 = 1.4251e-04
Loss = 3.3242e-03, PNorm = 183.6801, GNorm = 0.0731, lr_0 = 1.4241e-04
Loss = 2.3924e-03, PNorm = 183.6828, GNorm = 0.0827, lr_0 = 1.4231e-04
Loss = 1.8864e-03, PNorm = 183.6846, GNorm = 0.0684, lr_0 = 1.4222e-04
Loss = 3.7435e-03, PNorm = 183.6861, GNorm = 0.0842, lr_0 = 1.4212e-04
Loss = 2.7463e-03, PNorm = 183.6880, GNorm = 0.0906, lr_0 = 1.4202e-04
Loss = 2.0009e-03, PNorm = 183.6898, GNorm = 0.1272, lr_0 = 1.4192e-04
Loss = 5.3074e-03, PNorm = 183.6923, GNorm = 0.0866, lr_0 = 1.4183e-04
Loss = 1.7209e-03, PNorm = 183.6939, GNorm = 0.1055, lr_0 = 1.4173e-04
Loss = 3.0236e-03, PNorm = 183.6946, GNorm = 0.0897, lr_0 = 1.4163e-04
Loss = 2.9018e-03, PNorm = 183.6974, GNorm = 0.0944, lr_0 = 1.4153e-04
Loss = 3.1135e-03, PNorm = 183.6979, GNorm = 0.0938, lr_0 = 1.4144e-04
Loss = 5.2614e-03, PNorm = 183.6997, GNorm = 0.1368, lr_0 = 1.4134e-04
Loss = 4.8344e-03, PNorm = 183.7032, GNorm = 0.2559, lr_0 = 1.4124e-04
Loss = 1.8021e-03, PNorm = 183.7054, GNorm = 0.1256, lr_0 = 1.4115e-04
Loss = 1.2667e-02, PNorm = 183.7057, GNorm = 1.3771, lr_0 = 1.4105e-04
Loss = 5.7031e-03, PNorm = 183.7105, GNorm = 0.4507, lr_0 = 1.4095e-04
Loss = 4.5018e-03, PNorm = 183.7160, GNorm = 0.0633, lr_0 = 1.4086e-04
Loss = 1.9355e-03, PNorm = 183.7202, GNorm = 0.0988, lr_0 = 1.4076e-04
Loss = 6.3462e-03, PNorm = 183.7241, GNorm = 0.2743, lr_0 = 1.4066e-04
Loss = 2.2817e-03, PNorm = 183.7281, GNorm = 0.0891, lr_0 = 1.4057e-04
Loss = 3.5268e-03, PNorm = 183.7316, GNorm = 0.0846, lr_0 = 1.4047e-04
Loss = 2.8754e-03, PNorm = 183.7343, GNorm = 0.1232, lr_0 = 1.4038e-04
Loss = 6.3518e-03, PNorm = 183.7368, GNorm = 0.1260, lr_0 = 1.4028e-04
Loss = 1.9053e-03, PNorm = 183.7387, GNorm = 0.1435, lr_0 = 1.4018e-04
Loss = 6.0077e-03, PNorm = 183.7409, GNorm = 0.0785, lr_0 = 1.4009e-04
Loss = 2.0935e-03, PNorm = 183.7441, GNorm = 0.1378, lr_0 = 1.3999e-04
Loss = 2.7518e-03, PNorm = 183.7463, GNorm = 0.0693, lr_0 = 1.3990e-04
Loss = 7.5070e-03, PNorm = 183.7501, GNorm = 0.0982, lr_0 = 1.3980e-04
Loss = 4.0796e-03, PNorm = 183.7520, GNorm = 0.5465, lr_0 = 1.3970e-04
Loss = 1.8159e-03, PNorm = 183.7538, GNorm = 0.0557, lr_0 = 1.3961e-04
Loss = 5.4657e-03, PNorm = 183.7556, GNorm = 0.5187, lr_0 = 1.3951e-04
Loss = 2.0836e-03, PNorm = 183.7559, GNorm = 0.0808, lr_0 = 1.3942e-04
Loss = 1.6726e-03, PNorm = 183.7576, GNorm = 0.0646, lr_0 = 1.3932e-04
Loss = 4.3915e-03, PNorm = 183.7598, GNorm = 0.1412, lr_0 = 1.3923e-04
Loss = 3.1307e-03, PNorm = 183.7630, GNorm = 0.0695, lr_0 = 1.3913e-04
Loss = 6.3502e-03, PNorm = 183.7658, GNorm = 0.0535, lr_0 = 1.3904e-04
Loss = 1.7800e-03, PNorm = 183.7689, GNorm = 0.0698, lr_0 = 1.3894e-04
Validation mae = 0.120787
Epoch 26
Loss = 3.0808e-03, PNorm = 183.7730, GNorm = 0.1176, lr_0 = 1.3884e-04
Loss = 5.5649e-03, PNorm = 183.7730, GNorm = 0.1998, lr_0 = 1.3875e-04
Loss = 6.4295e-03, PNorm = 183.7734, GNorm = 0.0866, lr_0 = 1.3865e-04
Loss = 1.8011e-03, PNorm = 183.7749, GNorm = 0.0725, lr_0 = 1.3856e-04
Loss = 1.9666e-03, PNorm = 183.7759, GNorm = 0.0640, lr_0 = 1.3846e-04
Loss = 3.1155e-03, PNorm = 183.7771, GNorm = 0.0523, lr_0 = 1.3837e-04
Loss = 5.0041e-03, PNorm = 183.7786, GNorm = 0.0521, lr_0 = 1.3828e-04
Loss = 6.3136e-03, PNorm = 183.7800, GNorm = 0.2978, lr_0 = 1.3818e-04
Loss = 5.4056e-03, PNorm = 183.7827, GNorm = 0.6303, lr_0 = 1.3809e-04
Loss = 1.9883e-03, PNorm = 183.7848, GNorm = 0.1949, lr_0 = 1.3799e-04
Loss = 6.2495e-03, PNorm = 183.7860, GNorm = 0.5808, lr_0 = 1.3790e-04
Loss = 3.5194e-03, PNorm = 183.7868, GNorm = 0.0881, lr_0 = 1.3780e-04
Loss = 4.3052e-03, PNorm = 183.7883, GNorm = 0.1923, lr_0 = 1.3771e-04
Loss = 3.0659e-03, PNorm = 183.7890, GNorm = 0.0701, lr_0 = 1.3761e-04
Loss = 3.6168e-03, PNorm = 183.7913, GNorm = 0.0756, lr_0 = 1.3752e-04
Loss = 1.8415e-03, PNorm = 183.7932, GNorm = 0.1153, lr_0 = 1.3742e-04
Loss = 1.7506e-03, PNorm = 183.7944, GNorm = 0.0917, lr_0 = 1.3733e-04
Loss = 3.3423e-03, PNorm = 183.7969, GNorm = 0.1162, lr_0 = 1.3724e-04
Loss = 3.5953e-03, PNorm = 183.8006, GNorm = 0.0639, lr_0 = 1.3714e-04
Loss = 1.9883e-03, PNorm = 183.8038, GNorm = 0.1791, lr_0 = 1.3705e-04
Loss = 4.3042e-03, PNorm = 183.8060, GNorm = 0.1752, lr_0 = 1.3695e-04
Loss = 3.0207e-03, PNorm = 183.8083, GNorm = 0.1376, lr_0 = 1.3686e-04
Loss = 1.9749e-03, PNorm = 183.8103, GNorm = 0.1090, lr_0 = 1.3677e-04
Loss = 2.6998e-03, PNorm = 183.8121, GNorm = 0.1194, lr_0 = 1.3667e-04
Loss = 1.4105e-03, PNorm = 183.8144, GNorm = 0.0693, lr_0 = 1.3658e-04
Loss = 3.2643e-03, PNorm = 183.8184, GNorm = 0.0940, lr_0 = 1.3649e-04
Loss = 3.2407e-03, PNorm = 183.8209, GNorm = 0.1253, lr_0 = 1.3639e-04
Loss = 1.7195e-03, PNorm = 183.8231, GNorm = 0.0655, lr_0 = 1.3630e-04
Loss = 4.5005e-03, PNorm = 183.8250, GNorm = 0.1219, lr_0 = 1.3621e-04
Loss = 1.8025e-03, PNorm = 183.8269, GNorm = 0.1057, lr_0 = 1.3611e-04
Loss = 5.0389e-03, PNorm = 183.8282, GNorm = 0.1205, lr_0 = 1.3602e-04
Loss = 4.3490e-03, PNorm = 183.8304, GNorm = 0.0380, lr_0 = 1.3593e-04
Loss = 3.4644e-03, PNorm = 183.8327, GNorm = 0.1684, lr_0 = 1.3583e-04
Loss = 2.1681e-03, PNorm = 183.8337, GNorm = 0.0604, lr_0 = 1.3574e-04
Loss = 2.9466e-03, PNorm = 183.8350, GNorm = 0.0976, lr_0 = 1.3565e-04
Loss = 2.1864e-03, PNorm = 183.8352, GNorm = 0.1998, lr_0 = 1.3555e-04
Loss = 1.3454e-03, PNorm = 183.8364, GNorm = 0.0494, lr_0 = 1.3546e-04
Loss = 1.9163e-03, PNorm = 183.8373, GNorm = 0.1120, lr_0 = 1.3537e-04
Loss = 1.2334e-03, PNorm = 183.8387, GNorm = 0.1159, lr_0 = 1.3528e-04
Loss = 2.2513e-03, PNorm = 183.8397, GNorm = 0.1321, lr_0 = 1.3518e-04
Loss = 2.7804e-03, PNorm = 183.8413, GNorm = 0.0764, lr_0 = 1.3509e-04
Loss = 3.9148e-03, PNorm = 183.8437, GNorm = 0.0415, lr_0 = 1.3500e-04
Loss = 1.7512e-03, PNorm = 183.8447, GNorm = 0.1477, lr_0 = 1.3491e-04
Loss = 5.6823e-03, PNorm = 183.8464, GNorm = 0.1429, lr_0 = 1.3481e-04
Loss = 2.9258e-03, PNorm = 183.8475, GNorm = 0.0499, lr_0 = 1.3472e-04
Loss = 3.1287e-03, PNorm = 183.8493, GNorm = 0.1546, lr_0 = 1.3463e-04
Loss = 1.7822e-03, PNorm = 183.8516, GNorm = 0.1302, lr_0 = 1.3454e-04
Loss = 1.8393e-03, PNorm = 183.8534, GNorm = 0.0833, lr_0 = 1.3444e-04
Loss = 2.0523e-03, PNorm = 183.8555, GNorm = 0.0815, lr_0 = 1.3435e-04
Loss = 2.3971e-03, PNorm = 183.8577, GNorm = 0.1046, lr_0 = 1.3426e-04
Loss = 1.4777e-03, PNorm = 183.8601, GNorm = 0.0587, lr_0 = 1.3417e-04
Loss = 1.5908e-03, PNorm = 183.8612, GNorm = 0.1284, lr_0 = 1.3408e-04
Loss = 3.9561e-03, PNorm = 183.8613, GNorm = 0.1301, lr_0 = 1.3398e-04
Loss = 2.5230e-03, PNorm = 183.8620, GNorm = 0.1262, lr_0 = 1.3389e-04
Loss = 1.5641e-03, PNorm = 183.8646, GNorm = 0.1049, lr_0 = 1.3380e-04
Loss = 2.1623e-03, PNorm = 183.8667, GNorm = 0.0975, lr_0 = 1.3371e-04
Loss = 2.7245e-03, PNorm = 183.8685, GNorm = 0.0542, lr_0 = 1.3362e-04
Loss = 4.5201e-03, PNorm = 183.8711, GNorm = 0.1996, lr_0 = 1.3353e-04
Loss = 1.2528e-03, PNorm = 183.8738, GNorm = 0.0914, lr_0 = 1.3343e-04
Loss = 2.9704e-03, PNorm = 183.8757, GNorm = 0.1641, lr_0 = 1.3334e-04
Loss = 1.3978e-03, PNorm = 183.8775, GNorm = 0.1149, lr_0 = 1.3325e-04
Loss = 3.1331e-03, PNorm = 183.8789, GNorm = 0.1391, lr_0 = 1.3316e-04
Loss = 8.0962e-03, PNorm = 183.8815, GNorm = 0.6398, lr_0 = 1.3307e-04
Loss = 4.9135e-03, PNorm = 183.8837, GNorm = 0.0860, lr_0 = 1.3298e-04
Loss = 2.0069e-03, PNorm = 183.8868, GNorm = 0.0561, lr_0 = 1.3289e-04
Loss = 1.5616e-02, PNorm = 183.8899, GNorm = 0.1083, lr_0 = 1.3280e-04
Loss = 6.6994e-03, PNorm = 183.8899, GNorm = 0.0869, lr_0 = 1.3270e-04
Loss = 3.4058e-03, PNorm = 183.8904, GNorm = 0.2442, lr_0 = 1.3261e-04
Loss = 4.2685e-03, PNorm = 183.8925, GNorm = 0.1238, lr_0 = 1.3252e-04
Loss = 3.0879e-03, PNorm = 183.8932, GNorm = 0.1320, lr_0 = 1.3243e-04
Loss = 7.8554e-03, PNorm = 183.8934, GNorm = 0.0779, lr_0 = 1.3234e-04
Loss = 3.9402e-03, PNorm = 183.8938, GNorm = 0.1587, lr_0 = 1.3225e-04
Loss = 2.8118e-03, PNorm = 183.8961, GNorm = 0.0479, lr_0 = 1.3216e-04
Loss = 3.3346e-03, PNorm = 183.8972, GNorm = 0.0506, lr_0 = 1.3207e-04
Loss = 5.9784e-03, PNorm = 183.8997, GNorm = 0.1210, lr_0 = 1.3198e-04
Loss = 8.4316e-03, PNorm = 183.9024, GNorm = 0.0758, lr_0 = 1.3189e-04
Loss = 2.3465e-03, PNorm = 183.9043, GNorm = 0.0558, lr_0 = 1.3180e-04
Loss = 4.0324e-03, PNorm = 183.9058, GNorm = 0.0764, lr_0 = 1.3171e-04
Loss = 7.5279e-03, PNorm = 183.9066, GNorm = 0.1305, lr_0 = 1.3162e-04
Loss = 7.8641e-03, PNorm = 183.9064, GNorm = 0.1198, lr_0 = 1.3153e-04
Loss = 2.6483e-03, PNorm = 183.9068, GNorm = 0.0825, lr_0 = 1.3144e-04
Loss = 8.0407e-03, PNorm = 183.9088, GNorm = 0.1459, lr_0 = 1.3135e-04
Loss = 2.4572e-03, PNorm = 183.9100, GNorm = 0.7072, lr_0 = 1.3126e-04
Loss = 1.3700e-03, PNorm = 183.9116, GNorm = 0.0565, lr_0 = 1.3117e-04
Loss = 8.7194e-03, PNorm = 183.9133, GNorm = 0.1220, lr_0 = 1.3108e-04
Loss = 8.1068e-03, PNorm = 183.9162, GNorm = 0.0725, lr_0 = 1.3099e-04
Loss = 3.3413e-03, PNorm = 183.9178, GNorm = 0.0681, lr_0 = 1.3090e-04
Loss = 2.3274e-03, PNorm = 183.9191, GNorm = 0.0896, lr_0 = 1.3081e-04
Loss = 1.4265e-03, PNorm = 183.9221, GNorm = 0.0743, lr_0 = 1.3072e-04
Loss = 2.6552e-03, PNorm = 183.9252, GNorm = 0.0769, lr_0 = 1.3063e-04
Loss = 2.5688e-03, PNorm = 183.9275, GNorm = 0.1721, lr_0 = 1.3054e-04
Loss = 4.4275e-03, PNorm = 183.9308, GNorm = 0.0784, lr_0 = 1.3045e-04
Loss = 4.2300e-03, PNorm = 183.9326, GNorm = 0.1027, lr_0 = 1.3036e-04
Loss = 5.6308e-03, PNorm = 183.9354, GNorm = 0.1395, lr_0 = 1.3027e-04
Loss = 3.4350e-03, PNorm = 183.9382, GNorm = 0.0941, lr_0 = 1.3018e-04
Loss = 3.1977e-03, PNorm = 183.9414, GNorm = 0.1212, lr_0 = 1.3009e-04
Loss = 1.4812e-03, PNorm = 183.9441, GNorm = 0.0927, lr_0 = 1.3000e-04
Loss = 4.3409e-03, PNorm = 183.9465, GNorm = 0.1610, lr_0 = 1.2992e-04
Loss = 4.1316e-03, PNorm = 183.9489, GNorm = 0.1177, lr_0 = 1.2983e-04
Loss = 1.3481e-03, PNorm = 183.9513, GNorm = 0.0518, lr_0 = 1.2974e-04
Loss = 2.3143e-03, PNorm = 183.9539, GNorm = 0.0724, lr_0 = 1.2965e-04
Loss = 2.7180e-03, PNorm = 183.9559, GNorm = 0.2429, lr_0 = 1.2956e-04
Loss = 5.9508e-03, PNorm = 183.9570, GNorm = 0.0508, lr_0 = 1.2947e-04
Loss = 2.3814e-03, PNorm = 183.9583, GNorm = 0.1933, lr_0 = 1.2938e-04
Loss = 1.9495e-03, PNorm = 183.9613, GNorm = 0.0599, lr_0 = 1.2929e-04
Loss = 2.5917e-03, PNorm = 183.9641, GNorm = 0.1921, lr_0 = 1.2921e-04
Loss = 2.0203e-03, PNorm = 183.9673, GNorm = 0.1009, lr_0 = 1.2912e-04
Loss = 3.0936e-03, PNorm = 183.9693, GNorm = 0.0853, lr_0 = 1.2903e-04
Loss = 1.6365e-03, PNorm = 183.9714, GNorm = 0.0539, lr_0 = 1.2894e-04
Loss = 1.0820e-02, PNorm = 183.9721, GNorm = 0.6189, lr_0 = 1.2885e-04
Loss = 1.6560e-03, PNorm = 183.9731, GNorm = 0.0963, lr_0 = 1.2876e-04
Loss = 2.0075e-03, PNorm = 183.9750, GNorm = 0.1009, lr_0 = 1.2867e-04
Loss = 3.9083e-03, PNorm = 183.9773, GNorm = 0.1646, lr_0 = 1.2859e-04
Loss = 1.9281e-03, PNorm = 183.9788, GNorm = 0.0667, lr_0 = 1.2850e-04
Loss = 1.4686e-03, PNorm = 183.9812, GNorm = 0.0637, lr_0 = 1.2841e-04
Loss = 2.5766e-03, PNorm = 183.9824, GNorm = 0.0853, lr_0 = 1.2832e-04
Loss = 4.0043e-03, PNorm = 183.9826, GNorm = 0.1023, lr_0 = 1.2823e-04
Loss = 1.6748e-03, PNorm = 183.9827, GNorm = 0.0749, lr_0 = 1.2815e-04
Loss = 3.0653e-03, PNorm = 183.9842, GNorm = 0.0912, lr_0 = 1.2806e-04
Loss = 3.3192e-03, PNorm = 183.9861, GNorm = 0.0758, lr_0 = 1.2797e-04
Validation mae = 0.120618
Epoch 27
Loss = 1.8801e-03, PNorm = 183.9878, GNorm = 0.1614, lr_0 = 1.2788e-04
Loss = 1.5458e-03, PNorm = 183.9894, GNorm = 0.0521, lr_0 = 1.2780e-04
Loss = 1.3952e-03, PNorm = 183.9904, GNorm = 0.2081, lr_0 = 1.2771e-04
Loss = 4.1405e-03, PNorm = 183.9915, GNorm = 0.1359, lr_0 = 1.2762e-04
Loss = 7.4314e-03, PNorm = 183.9929, GNorm = 0.0526, lr_0 = 1.2753e-04
Loss = 1.7013e-03, PNorm = 183.9940, GNorm = 0.0945, lr_0 = 1.2745e-04
Loss = 2.6510e-03, PNorm = 183.9953, GNorm = 0.0512, lr_0 = 1.2736e-04
Loss = 2.3757e-03, PNorm = 183.9975, GNorm = 0.0995, lr_0 = 1.2727e-04
Loss = 2.9059e-03, PNorm = 183.9990, GNorm = 0.0654, lr_0 = 1.2718e-04
Loss = 2.7963e-03, PNorm = 184.0009, GNorm = 0.1543, lr_0 = 1.2710e-04
Loss = 3.6334e-03, PNorm = 184.0018, GNorm = 0.1063, lr_0 = 1.2701e-04
Loss = 1.5916e-03, PNorm = 184.0027, GNorm = 0.1313, lr_0 = 1.2692e-04
Loss = 1.2385e-03, PNorm = 184.0034, GNorm = 0.0467, lr_0 = 1.2684e-04
Loss = 2.6249e-03, PNorm = 184.0058, GNorm = 0.0532, lr_0 = 1.2675e-04
Loss = 1.6753e-03, PNorm = 184.0071, GNorm = 0.1335, lr_0 = 1.2666e-04
Loss = 4.7080e-03, PNorm = 184.0069, GNorm = 0.1195, lr_0 = 1.2658e-04
Loss = 3.3837e-03, PNorm = 184.0099, GNorm = 0.0541, lr_0 = 1.2649e-04
Loss = 2.0187e-03, PNorm = 184.0124, GNorm = 0.1335, lr_0 = 1.2640e-04
Loss = 4.5375e-03, PNorm = 184.0152, GNorm = 0.0738, lr_0 = 1.2632e-04
Loss = 4.9364e-03, PNorm = 184.0163, GNorm = 0.0859, lr_0 = 1.2623e-04
Loss = 7.5002e-03, PNorm = 184.0175, GNorm = 0.3058, lr_0 = 1.2614e-04
Loss = 5.2860e-03, PNorm = 184.0192, GNorm = 0.1332, lr_0 = 1.2606e-04
Loss = 2.7481e-03, PNorm = 184.0211, GNorm = 0.0601, lr_0 = 1.2597e-04
Loss = 2.7910e-03, PNorm = 184.0245, GNorm = 0.0611, lr_0 = 1.2588e-04
Loss = 3.8095e-03, PNorm = 184.0264, GNorm = 0.1554, lr_0 = 1.2580e-04
Loss = 2.1442e-03, PNorm = 184.0263, GNorm = 0.1096, lr_0 = 1.2571e-04
Loss = 2.9021e-03, PNorm = 184.0258, GNorm = 0.1104, lr_0 = 1.2563e-04
Loss = 2.2036e-03, PNorm = 184.0264, GNorm = 0.1013, lr_0 = 1.2554e-04
Loss = 9.5093e-03, PNorm = 184.0269, GNorm = 0.7348, lr_0 = 1.2545e-04
Loss = 2.4322e-03, PNorm = 184.0277, GNorm = 0.1001, lr_0 = 1.2537e-04
Loss = 6.0088e-03, PNorm = 184.0265, GNorm = 0.1948, lr_0 = 1.2528e-04
Loss = 7.2296e-03, PNorm = 184.0292, GNorm = 0.1880, lr_0 = 1.2520e-04
Loss = 2.6427e-03, PNorm = 184.0310, GNorm = 0.1371, lr_0 = 1.2511e-04
Loss = 2.3667e-03, PNorm = 184.0335, GNorm = 0.0508, lr_0 = 1.2502e-04
Loss = 3.6550e-03, PNorm = 184.0369, GNorm = 0.1477, lr_0 = 1.2494e-04
Loss = 1.5529e-03, PNorm = 184.0389, GNorm = 0.0617, lr_0 = 1.2485e-04
Loss = 1.4853e-03, PNorm = 184.0412, GNorm = 0.0545, lr_0 = 1.2477e-04
Loss = 3.0950e-03, PNorm = 184.0425, GNorm = 0.0690, lr_0 = 1.2468e-04
Loss = 3.7263e-03, PNorm = 184.0446, GNorm = 0.0478, lr_0 = 1.2460e-04
Loss = 3.3845e-03, PNorm = 184.0465, GNorm = 0.0459, lr_0 = 1.2451e-04
Loss = 1.6288e-03, PNorm = 184.0479, GNorm = 0.1331, lr_0 = 1.2443e-04
Loss = 3.1144e-03, PNorm = 184.0494, GNorm = 0.2210, lr_0 = 1.2434e-04
Loss = 2.1558e-03, PNorm = 184.0508, GNorm = 0.0464, lr_0 = 1.2426e-04
Loss = 3.1237e-03, PNorm = 184.0528, GNorm = 0.0743, lr_0 = 1.2417e-04
Loss = 1.1617e-03, PNorm = 184.0546, GNorm = 0.0720, lr_0 = 1.2409e-04
Loss = 2.2069e-03, PNorm = 184.0557, GNorm = 0.1851, lr_0 = 1.2400e-04
Loss = 3.2836e-03, PNorm = 184.0565, GNorm = 0.0803, lr_0 = 1.2392e-04
Loss = 1.2176e-03, PNorm = 184.0588, GNorm = 0.0917, lr_0 = 1.2383e-04
Loss = 2.1835e-03, PNorm = 184.0611, GNorm = 0.0773, lr_0 = 1.2375e-04
Loss = 4.9398e-03, PNorm = 184.0617, GNorm = 0.3023, lr_0 = 1.2366e-04
Loss = 1.3024e-03, PNorm = 184.0621, GNorm = 0.0809, lr_0 = 1.2358e-04
Loss = 3.8512e-03, PNorm = 184.0649, GNorm = 0.1564, lr_0 = 1.2349e-04
Loss = 4.7414e-03, PNorm = 184.0659, GNorm = 0.1405, lr_0 = 1.2341e-04
Loss = 1.2729e-02, PNorm = 184.0698, GNorm = 0.1486, lr_0 = 1.2332e-04
Loss = 1.9089e-03, PNorm = 184.0711, GNorm = 0.0801, lr_0 = 1.2324e-04
Loss = 2.2264e-03, PNorm = 184.0719, GNorm = 0.0821, lr_0 = 1.2315e-04
Loss = 1.3763e-03, PNorm = 184.0734, GNorm = 0.1167, lr_0 = 1.2307e-04
Loss = 1.6613e-03, PNorm = 184.0748, GNorm = 0.1164, lr_0 = 1.2298e-04
Loss = 1.1992e-03, PNorm = 184.0763, GNorm = 0.0833, lr_0 = 1.2290e-04
Loss = 2.7648e-03, PNorm = 184.0777, GNorm = 0.0803, lr_0 = 1.2282e-04
Loss = 1.1835e-03, PNorm = 184.0782, GNorm = 0.1146, lr_0 = 1.2273e-04
Loss = 1.7425e-03, PNorm = 184.0784, GNorm = 0.0597, lr_0 = 1.2265e-04
Loss = 6.9506e-03, PNorm = 184.0794, GNorm = 0.1226, lr_0 = 1.2256e-04
Loss = 3.3278e-03, PNorm = 184.0808, GNorm = 0.2367, lr_0 = 1.2248e-04
Loss = 1.6793e-03, PNorm = 184.0817, GNorm = 0.1184, lr_0 = 1.2240e-04
Loss = 9.9645e-04, PNorm = 184.0832, GNorm = 0.0781, lr_0 = 1.2231e-04
Loss = 4.0287e-03, PNorm = 184.0853, GNorm = 0.2384, lr_0 = 1.2223e-04
Loss = 4.7773e-03, PNorm = 184.0861, GNorm = 0.1319, lr_0 = 1.2214e-04
Loss = 1.5928e-03, PNorm = 184.0870, GNorm = 0.2072, lr_0 = 1.2206e-04
Loss = 1.1423e-03, PNorm = 184.0882, GNorm = 0.1513, lr_0 = 1.2198e-04
Loss = 3.0229e-03, PNorm = 184.0900, GNorm = 0.1030, lr_0 = 1.2189e-04
Loss = 3.4805e-03, PNorm = 184.0922, GNorm = 0.0902, lr_0 = 1.2181e-04
Loss = 2.3076e-03, PNorm = 184.0940, GNorm = 0.1651, lr_0 = 1.2173e-04
Loss = 1.3229e-03, PNorm = 184.0954, GNorm = 0.0971, lr_0 = 1.2164e-04
Loss = 2.1494e-03, PNorm = 184.0972, GNorm = 0.0984, lr_0 = 1.2156e-04
Loss = 2.8457e-03, PNorm = 184.0999, GNorm = 0.0807, lr_0 = 1.2148e-04
Loss = 2.5428e-03, PNorm = 184.1023, GNorm = 0.0621, lr_0 = 1.2139e-04
Loss = 8.8980e-03, PNorm = 184.1036, GNorm = 0.1606, lr_0 = 1.2131e-04
Loss = 1.4897e-02, PNorm = 184.1044, GNorm = 0.3433, lr_0 = 1.2123e-04
Loss = 1.1823e-03, PNorm = 184.1078, GNorm = 0.0866, lr_0 = 1.2114e-04
Loss = 2.8147e-03, PNorm = 184.1102, GNorm = 0.3468, lr_0 = 1.2106e-04
Loss = 2.3198e-03, PNorm = 184.1124, GNorm = 0.0657, lr_0 = 1.2098e-04
Loss = 2.8513e-03, PNorm = 184.1145, GNorm = 0.0436, lr_0 = 1.2090e-04
Loss = 2.5383e-03, PNorm = 184.1153, GNorm = 0.0964, lr_0 = 1.2081e-04
Loss = 2.3989e-03, PNorm = 184.1163, GNorm = 0.0403, lr_0 = 1.2073e-04
Loss = 1.9860e-03, PNorm = 184.1168, GNorm = 0.0823, lr_0 = 1.2065e-04
Loss = 1.5395e-03, PNorm = 184.1184, GNorm = 0.0839, lr_0 = 1.2056e-04
Loss = 5.7520e-03, PNorm = 184.1199, GNorm = 0.1079, lr_0 = 1.2048e-04
Loss = 1.5997e-03, PNorm = 184.1212, GNorm = 0.1354, lr_0 = 1.2040e-04
Loss = 2.1792e-03, PNorm = 184.1228, GNorm = 0.1043, lr_0 = 1.2032e-04
Loss = 7.7901e-03, PNorm = 184.1236, GNorm = 0.1226, lr_0 = 1.2023e-04
Loss = 2.4417e-03, PNorm = 184.1251, GNorm = 0.1697, lr_0 = 1.2015e-04
Loss = 1.6783e-03, PNorm = 184.1266, GNorm = 0.1092, lr_0 = 1.2007e-04
Loss = 4.4662e-03, PNorm = 184.1280, GNorm = 0.3804, lr_0 = 1.1999e-04
Loss = 5.6597e-03, PNorm = 184.1313, GNorm = 0.5525, lr_0 = 1.1991e-04
Loss = 2.1959e-03, PNorm = 184.1339, GNorm = 0.0734, lr_0 = 1.1982e-04
Loss = 1.8595e-03, PNorm = 184.1355, GNorm = 0.0966, lr_0 = 1.1974e-04
Loss = 3.1461e-03, PNorm = 184.1367, GNorm = 0.1485, lr_0 = 1.1966e-04
Loss = 1.7188e-03, PNorm = 184.1389, GNorm = 0.1019, lr_0 = 1.1958e-04
Loss = 1.1691e-03, PNorm = 184.1413, GNorm = 0.0481, lr_0 = 1.1950e-04
Loss = 2.1995e-03, PNorm = 184.1444, GNorm = 0.0964, lr_0 = 1.1941e-04
Loss = 1.7051e-03, PNorm = 184.1457, GNorm = 0.0856, lr_0 = 1.1933e-04
Loss = 2.4031e-03, PNorm = 184.1472, GNorm = 0.0343, lr_0 = 1.1925e-04
Loss = 9.9560e-04, PNorm = 184.1494, GNorm = 0.0576, lr_0 = 1.1917e-04
Loss = 1.4393e-03, PNorm = 184.1525, GNorm = 0.1526, lr_0 = 1.1909e-04
Loss = 3.3191e-03, PNorm = 184.1542, GNorm = 0.0530, lr_0 = 1.1901e-04
Loss = 2.9987e-03, PNorm = 184.1547, GNorm = 0.0322, lr_0 = 1.1892e-04
Loss = 4.5004e-03, PNorm = 184.1561, GNorm = 0.3523, lr_0 = 1.1884e-04
Loss = 1.7443e-03, PNorm = 184.1571, GNorm = 0.0600, lr_0 = 1.1876e-04
Loss = 1.0044e-03, PNorm = 184.1575, GNorm = 0.0656, lr_0 = 1.1868e-04
Loss = 5.9003e-03, PNorm = 184.1592, GNorm = 0.1118, lr_0 = 1.1860e-04
Loss = 1.1863e-03, PNorm = 184.1613, GNorm = 0.1140, lr_0 = 1.1852e-04
Loss = 2.2186e-03, PNorm = 184.1635, GNorm = 0.0786, lr_0 = 1.1844e-04
Loss = 5.2600e-03, PNorm = 184.1645, GNorm = 0.0773, lr_0 = 1.1835e-04
Loss = 6.0786e-03, PNorm = 184.1656, GNorm = 0.1561, lr_0 = 1.1827e-04
Loss = 6.9527e-03, PNorm = 184.1664, GNorm = 0.0618, lr_0 = 1.1819e-04
Loss = 7.7015e-03, PNorm = 184.1685, GNorm = 0.0467, lr_0 = 1.1811e-04
Loss = 2.0878e-03, PNorm = 184.1712, GNorm = 0.0713, lr_0 = 1.1803e-04
Loss = 5.2492e-03, PNorm = 184.1730, GNorm = 1.7019, lr_0 = 1.1795e-04
Loss = 2.8619e-03, PNorm = 184.1734, GNorm = 0.1238, lr_0 = 1.1787e-04
Validation mae = 0.120751
Epoch 28
Loss = 2.6870e-03, PNorm = 184.1756, GNorm = 0.1016, lr_0 = 1.1779e-04
Loss = 5.0702e-03, PNorm = 184.1766, GNorm = 0.0931, lr_0 = 1.1771e-04
Loss = 1.6515e-03, PNorm = 184.1769, GNorm = 0.0427, lr_0 = 1.1763e-04
Loss = 1.2309e-03, PNorm = 184.1777, GNorm = 0.1365, lr_0 = 1.1755e-04
Loss = 2.1438e-03, PNorm = 184.1789, GNorm = 0.0938, lr_0 = 1.1747e-04
Loss = 2.6549e-03, PNorm = 184.1800, GNorm = 0.0800, lr_0 = 1.1739e-04
Loss = 3.4661e-03, PNorm = 184.1812, GNorm = 0.0722, lr_0 = 1.1730e-04
Loss = 2.1232e-03, PNorm = 184.1833, GNorm = 0.0399, lr_0 = 1.1722e-04
Loss = 2.6658e-03, PNorm = 184.1851, GNorm = 0.0864, lr_0 = 1.1714e-04
Loss = 1.2936e-03, PNorm = 184.1868, GNorm = 0.1500, lr_0 = 1.1706e-04
Loss = 6.0639e-03, PNorm = 184.1875, GNorm = 0.0882, lr_0 = 1.1698e-04
Loss = 1.0570e-03, PNorm = 184.1881, GNorm = 0.0425, lr_0 = 1.1690e-04
Loss = 4.4321e-03, PNorm = 184.1886, GNorm = 0.2364, lr_0 = 1.1682e-04
Loss = 2.2930e-03, PNorm = 184.1899, GNorm = 0.1011, lr_0 = 1.1674e-04
Loss = 2.0988e-03, PNorm = 184.1909, GNorm = 0.0839, lr_0 = 1.1666e-04
Loss = 2.4174e-03, PNorm = 184.1919, GNorm = 0.0856, lr_0 = 1.1658e-04
Loss = 1.4019e-03, PNorm = 184.1932, GNorm = 0.0720, lr_0 = 1.1650e-04
Loss = 1.0474e-03, PNorm = 184.1941, GNorm = 0.0593, lr_0 = 1.1642e-04
Loss = 2.0711e-03, PNorm = 184.1946, GNorm = 0.1104, lr_0 = 1.1634e-04
Loss = 3.6471e-03, PNorm = 184.1952, GNorm = 0.2206, lr_0 = 1.1626e-04
Loss = 1.5022e-03, PNorm = 184.1959, GNorm = 0.0876, lr_0 = 1.1618e-04
Loss = 2.4614e-03, PNorm = 184.1974, GNorm = 0.0629, lr_0 = 1.1611e-04
Loss = 2.1014e-03, PNorm = 184.1972, GNorm = 0.1601, lr_0 = 1.1603e-04
Loss = 1.2288e-03, PNorm = 184.1977, GNorm = 0.1223, lr_0 = 1.1595e-04
Loss = 3.9910e-03, PNorm = 184.1998, GNorm = 0.1665, lr_0 = 1.1587e-04
Loss = 1.6357e-03, PNorm = 184.2000, GNorm = 0.1438, lr_0 = 1.1579e-04
Loss = 1.5058e-03, PNorm = 184.2011, GNorm = 0.0582, lr_0 = 1.1571e-04
Loss = 9.3323e-04, PNorm = 184.2033, GNorm = 0.0667, lr_0 = 1.1563e-04
Loss = 2.6263e-03, PNorm = 184.2056, GNorm = 0.2799, lr_0 = 1.1555e-04
Loss = 1.1380e-03, PNorm = 184.2085, GNorm = 0.0541, lr_0 = 1.1547e-04
Loss = 5.1750e-03, PNorm = 184.2109, GNorm = 0.0760, lr_0 = 1.1539e-04
Loss = 2.1670e-03, PNorm = 184.2135, GNorm = 0.0579, lr_0 = 1.1531e-04
Loss = 4.7167e-03, PNorm = 184.2155, GNorm = 0.1146, lr_0 = 1.1523e-04
Loss = 1.6169e-03, PNorm = 184.2160, GNorm = 0.1526, lr_0 = 1.1515e-04
Loss = 2.1848e-03, PNorm = 184.2172, GNorm = 0.0478, lr_0 = 1.1508e-04
Loss = 3.7476e-03, PNorm = 184.2188, GNorm = 0.1814, lr_0 = 1.1500e-04
Loss = 1.6201e-03, PNorm = 184.2204, GNorm = 0.1940, lr_0 = 1.1492e-04
Loss = 9.9985e-04, PNorm = 184.2220, GNorm = 0.0874, lr_0 = 1.1484e-04
Loss = 1.8740e-03, PNorm = 184.2232, GNorm = 0.0807, lr_0 = 1.1476e-04
Loss = 1.0439e-03, PNorm = 184.2241, GNorm = 0.0485, lr_0 = 1.1468e-04
Loss = 1.6108e-03, PNorm = 184.2251, GNorm = 0.0689, lr_0 = 1.1460e-04
Loss = 1.2372e-03, PNorm = 184.2265, GNorm = 0.0523, lr_0 = 1.1452e-04
Loss = 2.4793e-03, PNorm = 184.2272, GNorm = 0.0433, lr_0 = 1.1445e-04
Loss = 1.3612e-03, PNorm = 184.2282, GNorm = 0.0539, lr_0 = 1.1437e-04
Loss = 1.8460e-03, PNorm = 184.2295, GNorm = 0.1261, lr_0 = 1.1429e-04
Loss = 1.6641e-03, PNorm = 184.2303, GNorm = 0.0473, lr_0 = 1.1421e-04
Loss = 2.6193e-03, PNorm = 184.2302, GNorm = 0.0965, lr_0 = 1.1413e-04
Loss = 4.0388e-03, PNorm = 184.2315, GNorm = 0.0404, lr_0 = 1.1405e-04
Loss = 3.1546e-03, PNorm = 184.2331, GNorm = 0.1416, lr_0 = 1.1398e-04
Loss = 1.3641e-03, PNorm = 184.2349, GNorm = 0.1077, lr_0 = 1.1390e-04
Loss = 5.0532e-03, PNorm = 184.2368, GNorm = 0.1316, lr_0 = 1.1382e-04
Loss = 2.1654e-03, PNorm = 184.2382, GNorm = 0.0940, lr_0 = 1.1374e-04
Loss = 9.9515e-04, PNorm = 184.2399, GNorm = 0.1073, lr_0 = 1.1366e-04
Loss = 4.0740e-03, PNorm = 184.2416, GNorm = 0.1802, lr_0 = 1.1359e-04
Loss = 6.1411e-03, PNorm = 184.2434, GNorm = 0.0513, lr_0 = 1.1351e-04
Loss = 1.9084e-03, PNorm = 184.2452, GNorm = 0.0474, lr_0 = 1.1343e-04
Loss = 2.7562e-03, PNorm = 184.2469, GNorm = 0.1864, lr_0 = 1.1335e-04
Loss = 1.7136e-03, PNorm = 184.2486, GNorm = 0.0567, lr_0 = 1.1328e-04
Loss = 1.8599e-03, PNorm = 184.2503, GNorm = 0.0919, lr_0 = 1.1320e-04
Loss = 4.3858e-03, PNorm = 184.2507, GNorm = 0.1035, lr_0 = 1.1312e-04
Loss = 1.6477e-03, PNorm = 184.2522, GNorm = 0.0729, lr_0 = 1.1304e-04
Loss = 1.4751e-03, PNorm = 184.2544, GNorm = 0.1572, lr_0 = 1.1297e-04
Loss = 1.3067e-03, PNorm = 184.2557, GNorm = 0.1866, lr_0 = 1.1289e-04
Loss = 5.2598e-03, PNorm = 184.2567, GNorm = 0.1408, lr_0 = 1.1281e-04
Loss = 5.2796e-03, PNorm = 184.2579, GNorm = 0.4354, lr_0 = 1.1273e-04
Loss = 1.2976e-03, PNorm = 184.2589, GNorm = 0.1179, lr_0 = 1.1266e-04
Loss = 1.9167e-03, PNorm = 184.2599, GNorm = 0.1282, lr_0 = 1.1258e-04
Loss = 3.5127e-03, PNorm = 184.2612, GNorm = 0.1571, lr_0 = 1.1250e-04
Loss = 5.2912e-03, PNorm = 184.2634, GNorm = 0.0812, lr_0 = 1.1243e-04
Loss = 2.2249e-03, PNorm = 184.2641, GNorm = 0.0532, lr_0 = 1.1235e-04
Loss = 6.8580e-03, PNorm = 184.2648, GNorm = 0.1689, lr_0 = 1.1227e-04
Loss = 2.1136e-03, PNorm = 184.2675, GNorm = 0.0479, lr_0 = 1.1219e-04
Loss = 4.7926e-03, PNorm = 184.2700, GNorm = 0.0771, lr_0 = 1.1212e-04
Loss = 5.4912e-03, PNorm = 184.2718, GNorm = 0.1547, lr_0 = 1.1204e-04
Loss = 1.4763e-03, PNorm = 184.2724, GNorm = 0.0705, lr_0 = 1.1196e-04
Loss = 1.6733e-03, PNorm = 184.2726, GNorm = 0.0837, lr_0 = 1.1189e-04
Loss = 1.7935e-03, PNorm = 184.2739, GNorm = 0.0925, lr_0 = 1.1181e-04
Loss = 5.5344e-03, PNorm = 184.2758, GNorm = 0.1307, lr_0 = 1.1173e-04
Loss = 3.4935e-03, PNorm = 184.2766, GNorm = 0.0673, lr_0 = 1.1166e-04
Loss = 9.0440e-03, PNorm = 184.2783, GNorm = 0.1355, lr_0 = 1.1158e-04
Loss = 1.8347e-03, PNorm = 184.2793, GNorm = 0.1178, lr_0 = 1.1150e-04
Loss = 2.4718e-03, PNorm = 184.2816, GNorm = 0.0539, lr_0 = 1.1143e-04
Loss = 8.0894e-03, PNorm = 184.2824, GNorm = 0.3266, lr_0 = 1.1135e-04
Loss = 2.3116e-03, PNorm = 184.2851, GNorm = 0.0992, lr_0 = 1.1128e-04
Loss = 2.0030e-03, PNorm = 184.2861, GNorm = 0.0645, lr_0 = 1.1120e-04
Loss = 5.1729e-03, PNorm = 184.2863, GNorm = 0.1794, lr_0 = 1.1112e-04
Loss = 4.4053e-03, PNorm = 184.2870, GNorm = 0.0516, lr_0 = 1.1105e-04
Loss = 7.0320e-03, PNorm = 184.2886, GNorm = 0.1505, lr_0 = 1.1097e-04
Loss = 1.5927e-03, PNorm = 184.2899, GNorm = 0.0994, lr_0 = 1.1089e-04
Loss = 1.1179e-03, PNorm = 184.2916, GNorm = 0.0878, lr_0 = 1.1082e-04
Loss = 1.0030e-03, PNorm = 184.2937, GNorm = 0.1254, lr_0 = 1.1074e-04
Loss = 4.2821e-03, PNorm = 184.2943, GNorm = 0.0495, lr_0 = 1.1067e-04
Loss = 5.7282e-03, PNorm = 184.2955, GNorm = 0.1188, lr_0 = 1.1059e-04
Loss = 1.0885e-02, PNorm = 184.2964, GNorm = 0.3891, lr_0 = 1.1052e-04
Loss = 2.7000e-03, PNorm = 184.2981, GNorm = 0.1875, lr_0 = 1.1044e-04
Loss = 1.1923e-03, PNorm = 184.3016, GNorm = 0.0591, lr_0 = 1.1036e-04
Loss = 6.6044e-03, PNorm = 184.3025, GNorm = 0.1401, lr_0 = 1.1029e-04
Loss = 1.6879e-03, PNorm = 184.3040, GNorm = 0.1162, lr_0 = 1.1021e-04
Loss = 3.6757e-03, PNorm = 184.3058, GNorm = 0.1006, lr_0 = 1.1014e-04
Loss = 2.1354e-03, PNorm = 184.3087, GNorm = 0.0719, lr_0 = 1.1006e-04
Loss = 2.7680e-03, PNorm = 184.3105, GNorm = 0.0330, lr_0 = 1.0999e-04
Loss = 2.1859e-03, PNorm = 184.3121, GNorm = 0.0661, lr_0 = 1.0991e-04
Loss = 2.1012e-03, PNorm = 184.3148, GNorm = 0.7054, lr_0 = 1.0984e-04
Loss = 1.5878e-03, PNorm = 184.3173, GNorm = 0.0807, lr_0 = 1.0976e-04
Loss = 3.8252e-03, PNorm = 184.3202, GNorm = 0.0571, lr_0 = 1.0969e-04
Loss = 1.7364e-02, PNorm = 184.3237, GNorm = 0.1508, lr_0 = 1.0961e-04
Loss = 3.6290e-03, PNorm = 184.3243, GNorm = 0.1685, lr_0 = 1.0954e-04
Loss = 1.3375e-03, PNorm = 184.3252, GNorm = 0.1198, lr_0 = 1.0946e-04
Loss = 3.0946e-03, PNorm = 184.3265, GNorm = 0.1411, lr_0 = 1.0939e-04
Loss = 1.3920e-03, PNorm = 184.3276, GNorm = 0.0374, lr_0 = 1.0931e-04
Loss = 2.1893e-03, PNorm = 184.3278, GNorm = 0.1131, lr_0 = 1.0924e-04
Loss = 2.5784e-03, PNorm = 184.3283, GNorm = 0.0871, lr_0 = 1.0916e-04
Loss = 9.5439e-04, PNorm = 184.3292, GNorm = 0.0968, lr_0 = 1.0909e-04
Loss = 7.7828e-03, PNorm = 184.3294, GNorm = 0.0699, lr_0 = 1.0901e-04
Loss = 1.4815e-03, PNorm = 184.3306, GNorm = 0.1132, lr_0 = 1.0894e-04
Loss = 6.5462e-03, PNorm = 184.3323, GNorm = 0.0649, lr_0 = 1.0886e-04
Loss = 2.9353e-03, PNorm = 184.3345, GNorm = 0.1056, lr_0 = 1.0879e-04
Loss = 1.1014e-03, PNorm = 184.3357, GNorm = 0.1193, lr_0 = 1.0871e-04
Loss = 3.3149e-03, PNorm = 184.3361, GNorm = 0.0719, lr_0 = 1.0864e-04
Loss = 2.1014e-03, PNorm = 184.3388, GNorm = 0.1093, lr_0 = 1.0856e-04
Validation mae = 0.120676
Epoch 29
Loss = 1.9081e-03, PNorm = 184.3404, GNorm = 0.0878, lr_0 = 1.0849e-04
Loss = 1.5593e-03, PNorm = 184.3405, GNorm = 0.0639, lr_0 = 1.0841e-04
Loss = 1.8474e-03, PNorm = 184.3417, GNorm = 0.0748, lr_0 = 1.0834e-04
Loss = 1.3070e-03, PNorm = 184.3419, GNorm = 0.0575, lr_0 = 1.0827e-04
Loss = 4.2209e-03, PNorm = 184.3425, GNorm = 0.1267, lr_0 = 1.0819e-04
Loss = 3.2539e-03, PNorm = 184.3436, GNorm = 0.2537, lr_0 = 1.0812e-04
Loss = 9.8584e-04, PNorm = 184.3446, GNorm = 0.0829, lr_0 = 1.0804e-04
Loss = 4.4219e-03, PNorm = 184.3464, GNorm = 0.0497, lr_0 = 1.0797e-04
Loss = 4.5400e-03, PNorm = 184.3468, GNorm = 0.2207, lr_0 = 1.0790e-04
Loss = 4.0769e-03, PNorm = 184.3485, GNorm = 0.0737, lr_0 = 1.0782e-04
Loss = 7.9013e-03, PNorm = 184.3483, GNorm = 0.0706, lr_0 = 1.0775e-04
Loss = 2.1800e-03, PNorm = 184.3486, GNorm = 0.0853, lr_0 = 1.0767e-04
Loss = 1.7702e-03, PNorm = 184.3504, GNorm = 0.0701, lr_0 = 1.0760e-04
Loss = 3.0213e-03, PNorm = 184.3512, GNorm = 0.0860, lr_0 = 1.0753e-04
Loss = 1.1843e-03, PNorm = 184.3511, GNorm = 0.1091, lr_0 = 1.0745e-04
Loss = 2.6634e-03, PNorm = 184.3523, GNorm = 0.0529, lr_0 = 1.0738e-04
Loss = 3.0863e-03, PNorm = 184.3532, GNorm = 0.0750, lr_0 = 1.0731e-04
Loss = 4.5183e-03, PNorm = 184.3536, GNorm = 0.1980, lr_0 = 1.0723e-04
Loss = 2.3315e-03, PNorm = 184.3548, GNorm = 0.0694, lr_0 = 1.0716e-04
Loss = 2.1135e-03, PNorm = 184.3562, GNorm = 0.0505, lr_0 = 1.0709e-04
Loss = 2.4447e-03, PNorm = 184.3582, GNorm = 0.1957, lr_0 = 1.0701e-04
Loss = 5.9814e-03, PNorm = 184.3600, GNorm = 0.0522, lr_0 = 1.0694e-04
Loss = 1.0501e-03, PNorm = 184.3620, GNorm = 0.0547, lr_0 = 1.0687e-04
Loss = 4.1054e-03, PNorm = 184.3635, GNorm = 0.0821, lr_0 = 1.0679e-04
Loss = 9.2953e-04, PNorm = 184.3643, GNorm = 0.0879, lr_0 = 1.0672e-04
Loss = 1.5557e-03, PNorm = 184.3659, GNorm = 0.0543, lr_0 = 1.0665e-04
Loss = 3.2432e-03, PNorm = 184.3675, GNorm = 0.0877, lr_0 = 1.0657e-04
Loss = 2.5310e-03, PNorm = 184.3679, GNorm = 0.7319, lr_0 = 1.0650e-04
Loss = 4.6566e-03, PNorm = 184.3686, GNorm = 0.1340, lr_0 = 1.0643e-04
Loss = 1.2039e-03, PNorm = 184.3693, GNorm = 0.0604, lr_0 = 1.0635e-04
Loss = 9.5763e-04, PNorm = 184.3708, GNorm = 0.0728, lr_0 = 1.0628e-04
Loss = 3.3136e-03, PNorm = 184.3718, GNorm = 0.1036, lr_0 = 1.0621e-04
Loss = 1.9776e-03, PNorm = 184.3724, GNorm = 0.1152, lr_0 = 1.0614e-04
Loss = 7.4002e-03, PNorm = 184.3728, GNorm = 0.7198, lr_0 = 1.0606e-04
Loss = 2.1197e-03, PNorm = 184.3728, GNorm = 0.1544, lr_0 = 1.0599e-04
Loss = 3.1865e-03, PNorm = 184.3732, GNorm = 0.0622, lr_0 = 1.0592e-04
Loss = 2.1336e-03, PNorm = 184.3741, GNorm = 0.2804, lr_0 = 1.0585e-04
Loss = 1.4289e-03, PNorm = 184.3749, GNorm = 0.0591, lr_0 = 1.0577e-04
Loss = 1.1517e-03, PNorm = 184.3767, GNorm = 0.0639, lr_0 = 1.0570e-04
Loss = 1.3536e-03, PNorm = 184.3781, GNorm = 0.1193, lr_0 = 1.0563e-04
Loss = 1.4170e-03, PNorm = 184.3784, GNorm = 0.1633, lr_0 = 1.0556e-04
Loss = 2.6080e-03, PNorm = 184.3796, GNorm = 0.0552, lr_0 = 1.0548e-04
Loss = 9.8160e-04, PNorm = 184.3802, GNorm = 0.1338, lr_0 = 1.0541e-04
Loss = 1.7715e-03, PNorm = 184.3808, GNorm = 0.1115, lr_0 = 1.0534e-04
Loss = 1.0376e-03, PNorm = 184.3818, GNorm = 0.0913, lr_0 = 1.0527e-04
Loss = 3.5252e-03, PNorm = 184.3845, GNorm = 0.0832, lr_0 = 1.0519e-04
Loss = 1.6184e-03, PNorm = 184.3862, GNorm = 0.1125, lr_0 = 1.0512e-04
Loss = 9.7343e-03, PNorm = 184.3864, GNorm = 0.0582, lr_0 = 1.0505e-04
Loss = 3.0158e-03, PNorm = 184.3879, GNorm = 0.0692, lr_0 = 1.0498e-04
Loss = 1.6255e-03, PNorm = 184.3902, GNorm = 0.1130, lr_0 = 1.0491e-04
Loss = 2.5030e-03, PNorm = 184.3913, GNorm = 0.0963, lr_0 = 1.0483e-04
Loss = 3.7122e-03, PNorm = 184.3924, GNorm = 0.0620, lr_0 = 1.0476e-04
Loss = 2.1228e-03, PNorm = 184.3940, GNorm = 0.0592, lr_0 = 1.0469e-04
Loss = 3.4158e-03, PNorm = 184.3945, GNorm = 0.0669, lr_0 = 1.0462e-04
Loss = 1.5072e-03, PNorm = 184.3953, GNorm = 0.1686, lr_0 = 1.0455e-04
Loss = 3.2484e-03, PNorm = 184.3968, GNorm = 0.0519, lr_0 = 1.0448e-04
Loss = 2.5079e-03, PNorm = 184.3974, GNorm = 0.0560, lr_0 = 1.0440e-04
Loss = 9.0286e-04, PNorm = 184.3980, GNorm = 0.0801, lr_0 = 1.0433e-04
Loss = 2.6848e-03, PNorm = 184.3996, GNorm = 0.0801, lr_0 = 1.0426e-04
Loss = 1.0635e-03, PNorm = 184.4007, GNorm = 0.1134, lr_0 = 1.0419e-04
Loss = 1.8744e-03, PNorm = 184.4029, GNorm = 0.0893, lr_0 = 1.0412e-04
Loss = 1.4193e-03, PNorm = 184.4043, GNorm = 0.0833, lr_0 = 1.0405e-04
Loss = 2.5205e-03, PNorm = 184.4064, GNorm = 0.1384, lr_0 = 1.0398e-04
Loss = 3.6196e-03, PNorm = 184.4072, GNorm = 0.0558, lr_0 = 1.0391e-04
Loss = 1.8306e-03, PNorm = 184.4082, GNorm = 0.0674, lr_0 = 1.0383e-04
Loss = 4.9590e-03, PNorm = 184.4088, GNorm = 0.1487, lr_0 = 1.0376e-04
Loss = 1.2041e-03, PNorm = 184.4100, GNorm = 0.1316, lr_0 = 1.0369e-04
Loss = 1.3163e-03, PNorm = 184.4111, GNorm = 0.0495, lr_0 = 1.0362e-04
Loss = 8.7748e-04, PNorm = 184.4127, GNorm = 0.0396, lr_0 = 1.0355e-04
Loss = 1.0424e-03, PNorm = 184.4142, GNorm = 0.0805, lr_0 = 1.0348e-04
Loss = 8.8929e-03, PNorm = 184.4164, GNorm = 0.0668, lr_0 = 1.0341e-04
Loss = 2.2055e-03, PNorm = 184.4172, GNorm = 0.0348, lr_0 = 1.0334e-04
Loss = 1.9196e-03, PNorm = 184.4172, GNorm = 0.1340, lr_0 = 1.0327e-04
Loss = 1.9660e-03, PNorm = 184.4181, GNorm = 0.0512, lr_0 = 1.0320e-04
Loss = 4.5037e-03, PNorm = 184.4189, GNorm = 0.1160, lr_0 = 1.0312e-04
Loss = 6.5736e-03, PNorm = 184.4193, GNorm = 0.1272, lr_0 = 1.0305e-04
Loss = 1.8889e-03, PNorm = 184.4196, GNorm = 0.0972, lr_0 = 1.0298e-04
Loss = 1.1033e-03, PNorm = 184.4204, GNorm = 0.0939, lr_0 = 1.0291e-04
Loss = 5.9618e-03, PNorm = 184.4221, GNorm = 0.0833, lr_0 = 1.0284e-04
Loss = 9.1294e-04, PNorm = 184.4223, GNorm = 0.0945, lr_0 = 1.0277e-04
Loss = 1.9336e-03, PNorm = 184.4231, GNorm = 0.0516, lr_0 = 1.0270e-04
Loss = 3.8252e-03, PNorm = 184.4242, GNorm = 0.1318, lr_0 = 1.0263e-04
Loss = 4.7644e-03, PNorm = 184.4272, GNorm = 0.4511, lr_0 = 1.0256e-04
Loss = 4.0165e-03, PNorm = 184.4294, GNorm = 0.0635, lr_0 = 1.0249e-04
Loss = 2.4437e-03, PNorm = 184.4304, GNorm = 0.1473, lr_0 = 1.0242e-04
Loss = 2.0571e-03, PNorm = 184.4303, GNorm = 0.0674, lr_0 = 1.0235e-04
Loss = 3.0082e-03, PNorm = 184.4299, GNorm = 0.0572, lr_0 = 1.0228e-04
Loss = 1.6651e-03, PNorm = 184.4308, GNorm = 0.0439, lr_0 = 1.0221e-04
Loss = 1.5539e-03, PNorm = 184.4318, GNorm = 0.0572, lr_0 = 1.0214e-04
Loss = 1.4367e-03, PNorm = 184.4323, GNorm = 0.0363, lr_0 = 1.0207e-04
Loss = 9.5492e-04, PNorm = 184.4323, GNorm = 0.0534, lr_0 = 1.0200e-04
Loss = 1.3652e-03, PNorm = 184.4331, GNorm = 0.0351, lr_0 = 1.0193e-04
Loss = 3.2010e-03, PNorm = 184.4334, GNorm = 0.0452, lr_0 = 1.0186e-04
Loss = 8.7365e-03, PNorm = 184.4327, GNorm = 0.1053, lr_0 = 1.0179e-04
Loss = 2.8449e-03, PNorm = 184.4337, GNorm = 0.0854, lr_0 = 1.0172e-04
Loss = 9.8509e-04, PNorm = 184.4346, GNorm = 0.0654, lr_0 = 1.0165e-04
Loss = 8.6440e-04, PNorm = 184.4356, GNorm = 0.0717, lr_0 = 1.0158e-04
Loss = 4.5761e-03, PNorm = 184.4369, GNorm = 0.3631, lr_0 = 1.0151e-04
Loss = 2.7038e-03, PNorm = 184.4380, GNorm = 0.0676, lr_0 = 1.0144e-04
Loss = 1.2512e-03, PNorm = 184.4387, GNorm = 0.1477, lr_0 = 1.0137e-04
Loss = 1.9630e-03, PNorm = 184.4390, GNorm = 0.0728, lr_0 = 1.0130e-04
Loss = 3.7821e-03, PNorm = 184.4396, GNorm = 0.0730, lr_0 = 1.0123e-04
Loss = 3.0156e-03, PNorm = 184.4406, GNorm = 0.4578, lr_0 = 1.0116e-04
Loss = 1.1648e-03, PNorm = 184.4418, GNorm = 0.0755, lr_0 = 1.0110e-04
Loss = 2.4276e-03, PNorm = 184.4429, GNorm = 0.0570, lr_0 = 1.0103e-04
Loss = 4.6056e-03, PNorm = 184.4436, GNorm = 0.1687, lr_0 = 1.0096e-04
Loss = 1.5449e-03, PNorm = 184.4448, GNorm = 0.0611, lr_0 = 1.0089e-04
Loss = 1.6750e-03, PNorm = 184.4474, GNorm = 0.1013, lr_0 = 1.0082e-04
Loss = 8.8858e-04, PNorm = 184.4495, GNorm = 0.0671, lr_0 = 1.0075e-04
Loss = 1.5131e-03, PNorm = 184.4514, GNorm = 0.1541, lr_0 = 1.0068e-04
Loss = 2.2466e-03, PNorm = 184.4528, GNorm = 0.1326, lr_0 = 1.0061e-04
Loss = 5.2620e-03, PNorm = 184.4537, GNorm = 0.0450, lr_0 = 1.0054e-04
Loss = 4.5100e-03, PNorm = 184.4546, GNorm = 0.3329, lr_0 = 1.0047e-04
Loss = 6.1947e-03, PNorm = 184.4574, GNorm = 0.0427, lr_0 = 1.0041e-04
Loss = 4.2014e-03, PNorm = 184.4589, GNorm = 0.0995, lr_0 = 1.0034e-04
Loss = 2.7801e-03, PNorm = 184.4608, GNorm = 0.1392, lr_0 = 1.0027e-04
Loss = 1.1298e-03, PNorm = 184.4626, GNorm = 0.0467, lr_0 = 1.0020e-04
Loss = 1.4106e-02, PNorm = 184.4638, GNorm = 2.8218, lr_0 = 1.0013e-04
Loss = 5.9441e-03, PNorm = 184.4651, GNorm = 0.4326, lr_0 = 1.0006e-04
Loss = 3.0103e-03, PNorm = 184.4653, GNorm = 0.0548, lr_0 = 1.0000e-04
Validation mae = 0.120540
Model 0 best validation mae = 0.120540 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119779
Ensemble test mae = 0.119779
Fold 5
Splitting data with seed 5
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.5045e-01, PNorm = 64.6280, GNorm = 2.3905, lr_0 = 1.0413e-04
Loss = 7.3381e-01, PNorm = 64.6394, GNorm = 1.6616, lr_0 = 1.0788e-04
Loss = 7.7293e-01, PNorm = 64.6497, GNorm = 2.9048, lr_0 = 1.1163e-04
Loss = 7.6385e-01, PNorm = 64.6591, GNorm = 2.2732, lr_0 = 1.1537e-04
Loss = 7.4389e-01, PNorm = 64.6677, GNorm = 1.6256, lr_0 = 1.1913e-04
Loss = 7.4898e-01, PNorm = 64.6775, GNorm = 2.6787, lr_0 = 1.2287e-04
Loss = 6.9391e-01, PNorm = 64.6878, GNorm = 2.5088, lr_0 = 1.2663e-04
Loss = 6.0536e-01, PNorm = 64.6966, GNorm = 1.8621, lr_0 = 1.3038e-04
Loss = 6.6698e-01, PNorm = 64.7057, GNorm = 2.5437, lr_0 = 1.3413e-04
Loss = 7.5390e-01, PNorm = 64.7174, GNorm = 2.5638, lr_0 = 1.3788e-04
Loss = 6.4460e-01, PNorm = 64.7267, GNorm = 3.4490, lr_0 = 1.4163e-04
Loss = 6.4738e-01, PNorm = 64.7360, GNorm = 2.4676, lr_0 = 1.4537e-04
Loss = 6.5050e-01, PNorm = 64.7471, GNorm = 2.7942, lr_0 = 1.4913e-04
Loss = 6.7006e-01, PNorm = 64.7581, GNorm = 1.7857, lr_0 = 1.5288e-04
Loss = 6.1755e-01, PNorm = 64.7718, GNorm = 3.0489, lr_0 = 1.5662e-04
Loss = 5.7235e-01, PNorm = 64.7842, GNorm = 2.2997, lr_0 = 1.6038e-04
Loss = 6.4420e-01, PNorm = 64.7958, GNorm = 2.7885, lr_0 = 1.6412e-04
Loss = 6.5652e-01, PNorm = 64.8074, GNorm = 2.5141, lr_0 = 1.6788e-04
Loss = 5.8372e-01, PNorm = 64.8215, GNorm = 2.4369, lr_0 = 1.7163e-04
Loss = 6.4762e-01, PNorm = 64.8348, GNorm = 3.5576, lr_0 = 1.7538e-04
Loss = 5.6115e-01, PNorm = 64.8476, GNorm = 2.1027, lr_0 = 1.7913e-04
Loss = 5.9631e-01, PNorm = 64.8599, GNorm = 1.6438, lr_0 = 1.8288e-04
Loss = 5.9224e-01, PNorm = 64.8745, GNorm = 2.0228, lr_0 = 1.8662e-04
Loss = 5.6896e-01, PNorm = 64.8884, GNorm = 1.9406, lr_0 = 1.9038e-04
Loss = 6.2473e-01, PNorm = 64.9020, GNorm = 2.0627, lr_0 = 1.9413e-04
Loss = 6.1325e-01, PNorm = 64.9169, GNorm = 3.4989, lr_0 = 1.9788e-04
Loss = 6.9339e-01, PNorm = 64.9334, GNorm = 2.5753, lr_0 = 2.0163e-04
Loss = 5.8556e-01, PNorm = 64.9538, GNorm = 1.9602, lr_0 = 2.0537e-04
Loss = 5.4501e-01, PNorm = 64.9694, GNorm = 1.9605, lr_0 = 2.0913e-04
Loss = 5.3580e-01, PNorm = 64.9844, GNorm = 2.2648, lr_0 = 2.1288e-04
Loss = 6.6813e-01, PNorm = 65.0013, GNorm = 2.0540, lr_0 = 2.1663e-04
Loss = 6.1239e-01, PNorm = 65.0210, GNorm = 2.3307, lr_0 = 2.2038e-04
Loss = 5.0801e-01, PNorm = 65.0424, GNorm = 2.0650, lr_0 = 2.2412e-04
Loss = 5.7837e-01, PNorm = 65.0600, GNorm = 1.7525, lr_0 = 2.2787e-04
Loss = 5.0984e-01, PNorm = 65.0795, GNorm = 1.8728, lr_0 = 2.3163e-04
Loss = 5.7010e-01, PNorm = 65.1020, GNorm = 1.9323, lr_0 = 2.3538e-04
Loss = 5.6380e-01, PNorm = 65.1220, GNorm = 1.9916, lr_0 = 2.3913e-04
Loss = 5.3323e-01, PNorm = 65.1406, GNorm = 1.9111, lr_0 = 2.4288e-04
Loss = 5.3296e-01, PNorm = 65.1627, GNorm = 1.7336, lr_0 = 2.4662e-04
Loss = 5.4209e-01, PNorm = 65.1831, GNorm = 1.5076, lr_0 = 2.5038e-04
Loss = 5.8356e-01, PNorm = 65.2054, GNorm = 1.6508, lr_0 = 2.5413e-04
Loss = 5.8950e-01, PNorm = 65.2322, GNorm = 2.3974, lr_0 = 2.5788e-04
Loss = 5.8116e-01, PNorm = 65.2542, GNorm = 1.6812, lr_0 = 2.6163e-04
Loss = 5.3622e-01, PNorm = 65.2790, GNorm = 1.2753, lr_0 = 2.6537e-04
Loss = 5.8141e-01, PNorm = 65.3058, GNorm = 2.3212, lr_0 = 2.6912e-04
Loss = 5.4708e-01, PNorm = 65.3322, GNorm = 1.8239, lr_0 = 2.7288e-04
Loss = 5.2996e-01, PNorm = 65.3536, GNorm = 1.4872, lr_0 = 2.7663e-04
Loss = 5.2489e-01, PNorm = 65.3801, GNorm = 1.5390, lr_0 = 2.8038e-04
Loss = 5.9002e-01, PNorm = 65.4056, GNorm = 2.2561, lr_0 = 2.8413e-04
Loss = 5.6577e-01, PNorm = 65.4329, GNorm = 1.7151, lr_0 = 2.8787e-04
Loss = 5.4108e-01, PNorm = 65.4615, GNorm = 1.5968, lr_0 = 2.9163e-04
Loss = 5.6885e-01, PNorm = 65.4920, GNorm = 2.0076, lr_0 = 2.9538e-04
Loss = 5.9960e-01, PNorm = 65.5204, GNorm = 1.7361, lr_0 = 2.9913e-04
Loss = 5.5084e-01, PNorm = 65.5495, GNorm = 2.0866, lr_0 = 3.0288e-04
Loss = 5.4466e-01, PNorm = 65.5811, GNorm = 1.7143, lr_0 = 3.0662e-04
Loss = 5.0646e-01, PNorm = 65.6080, GNorm = 1.3338, lr_0 = 3.1037e-04
Loss = 4.5834e-01, PNorm = 65.6410, GNorm = 1.7444, lr_0 = 3.1413e-04
Loss = 5.4784e-01, PNorm = 65.6696, GNorm = 1.8382, lr_0 = 3.1788e-04
Loss = 5.3655e-01, PNorm = 65.6981, GNorm = 1.4567, lr_0 = 3.2163e-04
Loss = 5.9684e-01, PNorm = 65.7336, GNorm = 2.5390, lr_0 = 3.2538e-04
Loss = 4.9851e-01, PNorm = 65.7664, GNorm = 1.2985, lr_0 = 3.2912e-04
Loss = 5.2217e-01, PNorm = 65.7942, GNorm = 1.4933, lr_0 = 3.3288e-04
Loss = 5.7839e-01, PNorm = 65.8253, GNorm = 1.3168, lr_0 = 3.3663e-04
Loss = 5.7606e-01, PNorm = 65.8569, GNorm = 1.8956, lr_0 = 3.4038e-04
Loss = 4.8232e-01, PNorm = 65.8883, GNorm = 1.0633, lr_0 = 3.4413e-04
Loss = 5.0297e-01, PNorm = 65.9211, GNorm = 1.2356, lr_0 = 3.4787e-04
Loss = 4.5422e-01, PNorm = 65.9550, GNorm = 1.6212, lr_0 = 3.5162e-04
Loss = 5.1584e-01, PNorm = 65.9875, GNorm = 1.3638, lr_0 = 3.5538e-04
Loss = 5.6826e-01, PNorm = 66.0230, GNorm = 1.3408, lr_0 = 3.5913e-04
Loss = 4.5260e-01, PNorm = 66.0618, GNorm = 1.2644, lr_0 = 3.6288e-04
Loss = 5.4296e-01, PNorm = 66.0981, GNorm = 1.2171, lr_0 = 3.6662e-04
Loss = 5.1257e-01, PNorm = 66.1342, GNorm = 1.2575, lr_0 = 3.7037e-04
Loss = 4.9185e-01, PNorm = 66.1783, GNorm = 1.2749, lr_0 = 3.7413e-04
Loss = 5.5637e-01, PNorm = 66.2122, GNorm = 1.8114, lr_0 = 3.7788e-04
Loss = 5.1992e-01, PNorm = 66.2563, GNorm = 1.9660, lr_0 = 3.8163e-04
Loss = 5.3998e-01, PNorm = 66.3015, GNorm = 1.3004, lr_0 = 3.8537e-04
Loss = 5.2473e-01, PNorm = 66.3432, GNorm = 1.3812, lr_0 = 3.8912e-04
Loss = 4.4045e-01, PNorm = 66.3894, GNorm = 1.3699, lr_0 = 3.9287e-04
Loss = 5.3137e-01, PNorm = 66.4297, GNorm = 1.7372, lr_0 = 3.9663e-04
Loss = 5.6951e-01, PNorm = 66.4769, GNorm = 1.2498, lr_0 = 4.0038e-04
Loss = 5.2596e-01, PNorm = 66.5225, GNorm = 1.6716, lr_0 = 4.0413e-04
Loss = 4.4601e-01, PNorm = 66.5673, GNorm = 1.2553, lr_0 = 4.0787e-04
Loss = 5.6455e-01, PNorm = 66.6078, GNorm = 1.9213, lr_0 = 4.1162e-04
Loss = 4.1657e-01, PNorm = 66.6496, GNorm = 1.3590, lr_0 = 4.1537e-04
Loss = 4.5193e-01, PNorm = 66.6940, GNorm = 1.4584, lr_0 = 4.1913e-04
Loss = 4.5028e-01, PNorm = 66.7374, GNorm = 1.1652, lr_0 = 4.2288e-04
Loss = 4.9460e-01, PNorm = 66.7798, GNorm = 1.0857, lr_0 = 4.2662e-04
Loss = 6.2638e-01, PNorm = 66.8267, GNorm = 2.1526, lr_0 = 4.3037e-04
Loss = 5.0515e-01, PNorm = 66.8737, GNorm = 1.3258, lr_0 = 4.3412e-04
Loss = 6.4634e-01, PNorm = 66.9279, GNorm = 1.5252, lr_0 = 4.3788e-04
Loss = 4.8994e-01, PNorm = 66.9765, GNorm = 1.5834, lr_0 = 4.4163e-04
Loss = 4.9251e-01, PNorm = 67.0184, GNorm = 1.3541, lr_0 = 4.4538e-04
Loss = 4.8115e-01, PNorm = 67.0641, GNorm = 1.1098, lr_0 = 4.4912e-04
Loss = 5.8529e-01, PNorm = 67.1140, GNorm = 1.4898, lr_0 = 4.5287e-04
Loss = 5.7642e-01, PNorm = 67.1640, GNorm = 1.3266, lr_0 = 4.5662e-04
Loss = 4.8874e-01, PNorm = 67.2197, GNorm = 1.6634, lr_0 = 4.6038e-04
Loss = 5.1826e-01, PNorm = 67.2714, GNorm = 1.3202, lr_0 = 4.6413e-04
Loss = 5.1628e-01, PNorm = 67.3230, GNorm = 1.1115, lr_0 = 4.6787e-04
Loss = 5.9066e-01, PNorm = 67.3835, GNorm = 1.4519, lr_0 = 4.7162e-04
Loss = 5.0943e-01, PNorm = 67.4385, GNorm = 1.2257, lr_0 = 4.7537e-04
Loss = 5.1363e-01, PNorm = 67.4925, GNorm = 1.0669, lr_0 = 4.7913e-04
Loss = 4.4019e-01, PNorm = 67.5476, GNorm = 1.2810, lr_0 = 4.8288e-04
Loss = 4.8684e-01, PNorm = 67.6034, GNorm = 1.1221, lr_0 = 4.8663e-04
Loss = 5.6322e-01, PNorm = 67.6577, GNorm = 1.1344, lr_0 = 4.9038e-04
Loss = 4.5170e-01, PNorm = 67.7169, GNorm = 1.0043, lr_0 = 4.9412e-04
Loss = 5.4925e-01, PNorm = 67.7752, GNorm = 1.3041, lr_0 = 4.9788e-04
Loss = 5.0597e-01, PNorm = 67.8359, GNorm = 1.2917, lr_0 = 5.0163e-04
Loss = 5.0259e-01, PNorm = 67.8949, GNorm = 1.2976, lr_0 = 5.0538e-04
Loss = 5.0913e-01, PNorm = 67.9575, GNorm = 1.4659, lr_0 = 5.0913e-04
Loss = 4.3758e-01, PNorm = 68.0142, GNorm = 1.3629, lr_0 = 5.1287e-04
Loss = 4.6979e-01, PNorm = 68.0692, GNorm = 1.1682, lr_0 = 5.1663e-04
Loss = 5.0394e-01, PNorm = 68.1256, GNorm = 1.2850, lr_0 = 5.2038e-04
Loss = 5.1070e-01, PNorm = 68.1827, GNorm = 1.0523, lr_0 = 5.2413e-04
Loss = 4.6235e-01, PNorm = 68.2382, GNorm = 0.9615, lr_0 = 5.2788e-04
Loss = 4.8885e-01, PNorm = 68.3037, GNorm = 1.7613, lr_0 = 5.3162e-04
Loss = 4.9643e-01, PNorm = 68.3626, GNorm = 1.3697, lr_0 = 5.3538e-04
Loss = 4.9807e-01, PNorm = 68.4315, GNorm = 1.1288, lr_0 = 5.3912e-04
Loss = 4.9065e-01, PNorm = 68.4976, GNorm = 1.0501, lr_0 = 5.4288e-04
Loss = 4.7009e-01, PNorm = 68.5587, GNorm = 1.4733, lr_0 = 5.4663e-04
Loss = 4.7179e-01, PNorm = 68.6207, GNorm = 1.1109, lr_0 = 5.5038e-04
Validation mae = 0.130359
Epoch 1
Loss = 3.4874e-01, PNorm = 68.6819, GNorm = 0.7526, lr_0 = 5.5413e-04
Loss = 4.3433e-01, PNorm = 68.7604, GNorm = 1.1600, lr_0 = 5.5787e-04
Loss = 3.5127e-01, PNorm = 68.8313, GNorm = 1.0317, lr_0 = 5.6163e-04
Loss = 3.5119e-01, PNorm = 68.9073, GNorm = 1.3828, lr_0 = 5.6538e-04
Loss = 4.7897e-01, PNorm = 68.9871, GNorm = 1.5171, lr_0 = 5.6913e-04
Loss = 5.0534e-01, PNorm = 69.0838, GNorm = 1.1837, lr_0 = 5.7288e-04
Loss = 3.7657e-01, PNorm = 69.1887, GNorm = 1.0509, lr_0 = 5.7662e-04
Loss = 4.3092e-01, PNorm = 69.2897, GNorm = 1.1581, lr_0 = 5.8038e-04
Loss = 3.3539e-01, PNorm = 69.3806, GNorm = 1.2261, lr_0 = 5.8413e-04
Loss = 4.0279e-01, PNorm = 69.4680, GNorm = 1.0434, lr_0 = 5.8788e-04
Loss = 4.2020e-01, PNorm = 69.5628, GNorm = 1.1851, lr_0 = 5.9163e-04
Loss = 4.3587e-01, PNorm = 69.6590, GNorm = 1.6373, lr_0 = 5.9538e-04
Loss = 3.6570e-01, PNorm = 69.7615, GNorm = 1.4368, lr_0 = 5.9913e-04
Loss = 3.6713e-01, PNorm = 69.8555, GNorm = 1.0637, lr_0 = 6.0288e-04
Loss = 4.0078e-01, PNorm = 69.9464, GNorm = 1.4572, lr_0 = 6.0663e-04
Loss = 4.0856e-01, PNorm = 70.0477, GNorm = 1.1969, lr_0 = 6.1038e-04
Loss = 3.5071e-01, PNorm = 70.1555, GNorm = 1.4860, lr_0 = 6.1413e-04
Loss = 3.9875e-01, PNorm = 70.2571, GNorm = 1.6273, lr_0 = 6.1788e-04
Loss = 3.8930e-01, PNorm = 70.3654, GNorm = 1.4210, lr_0 = 6.2163e-04
Loss = 3.5822e-01, PNorm = 70.4722, GNorm = 1.3460, lr_0 = 6.2538e-04
Loss = 4.1665e-01, PNorm = 70.5748, GNorm = 1.3449, lr_0 = 6.2913e-04
Loss = 4.2826e-01, PNorm = 70.6916, GNorm = 1.3455, lr_0 = 6.3288e-04
Loss = 4.5707e-01, PNorm = 70.8105, GNorm = 1.3973, lr_0 = 6.3663e-04
Loss = 3.6624e-01, PNorm = 70.9336, GNorm = 1.1258, lr_0 = 6.4038e-04
Loss = 3.7656e-01, PNorm = 71.0555, GNorm = 1.1682, lr_0 = 6.4413e-04
Loss = 4.4659e-01, PNorm = 71.1759, GNorm = 1.6317, lr_0 = 6.4788e-04
Loss = 3.7090e-01, PNorm = 71.3000, GNorm = 1.2858, lr_0 = 6.5163e-04
Loss = 3.6182e-01, PNorm = 71.4115, GNorm = 1.2139, lr_0 = 6.5538e-04
Loss = 3.9105e-01, PNorm = 71.5261, GNorm = 1.3000, lr_0 = 6.5913e-04
Loss = 4.1633e-01, PNorm = 71.6341, GNorm = 1.4309, lr_0 = 6.6288e-04
Loss = 3.7732e-01, PNorm = 71.7415, GNorm = 1.3038, lr_0 = 6.6663e-04
Loss = 4.4315e-01, PNorm = 71.8554, GNorm = 1.0581, lr_0 = 6.7038e-04
Loss = 4.2558e-01, PNorm = 71.9814, GNorm = 1.1535, lr_0 = 6.7413e-04
Loss = 4.6938e-01, PNorm = 72.0910, GNorm = 1.3425, lr_0 = 6.7788e-04
Loss = 3.9580e-01, PNorm = 72.2077, GNorm = 1.1537, lr_0 = 6.8163e-04
Loss = 4.5388e-01, PNorm = 72.3368, GNorm = 1.7084, lr_0 = 6.8538e-04
Loss = 4.2326e-01, PNorm = 72.4536, GNorm = 0.9308, lr_0 = 6.8913e-04
Loss = 4.5302e-01, PNorm = 72.5957, GNorm = 1.8162, lr_0 = 6.9288e-04
Loss = 4.1810e-01, PNorm = 72.7249, GNorm = 1.1239, lr_0 = 6.9663e-04
Loss = 4.5456e-01, PNorm = 72.8745, GNorm = 1.5708, lr_0 = 7.0038e-04
Loss = 4.2837e-01, PNorm = 73.0140, GNorm = 1.3738, lr_0 = 7.0413e-04
Loss = 4.2133e-01, PNorm = 73.1453, GNorm = 1.3908, lr_0 = 7.0788e-04
Loss = 4.0921e-01, PNorm = 73.2653, GNorm = 1.3570, lr_0 = 7.1163e-04
Loss = 4.2255e-01, PNorm = 73.3905, GNorm = 1.1446, lr_0 = 7.1538e-04
Loss = 4.3168e-01, PNorm = 73.5091, GNorm = 1.2715, lr_0 = 7.1913e-04
Loss = 3.4355e-01, PNorm = 73.6291, GNorm = 0.9198, lr_0 = 7.2288e-04
Loss = 3.4305e-01, PNorm = 73.7339, GNorm = 1.1626, lr_0 = 7.2663e-04
Loss = 4.0869e-01, PNorm = 73.8356, GNorm = 1.3518, lr_0 = 7.3038e-04
Loss = 3.9363e-01, PNorm = 73.9507, GNorm = 1.5677, lr_0 = 7.3413e-04
Loss = 3.6676e-01, PNorm = 74.0550, GNorm = 1.2617, lr_0 = 7.3788e-04
Loss = 4.2452e-01, PNorm = 74.1800, GNorm = 1.6516, lr_0 = 7.4163e-04
Loss = 4.2682e-01, PNorm = 74.3057, GNorm = 2.0220, lr_0 = 7.4538e-04
Loss = 3.7215e-01, PNorm = 74.4345, GNorm = 1.1841, lr_0 = 7.4913e-04
Loss = 3.9125e-01, PNorm = 74.5572, GNorm = 1.0284, lr_0 = 7.5288e-04
Loss = 3.8247e-01, PNorm = 74.6792, GNorm = 1.1573, lr_0 = 7.5663e-04
Loss = 3.7629e-01, PNorm = 74.7971, GNorm = 1.3094, lr_0 = 7.6038e-04
Loss = 4.5520e-01, PNorm = 74.9225, GNorm = 1.1517, lr_0 = 7.6413e-04
Loss = 4.5262e-01, PNorm = 75.0736, GNorm = 1.4796, lr_0 = 7.6788e-04
Loss = 4.2893e-01, PNorm = 75.2214, GNorm = 1.7080, lr_0 = 7.7163e-04
Loss = 3.7110e-01, PNorm = 75.3634, GNorm = 1.1488, lr_0 = 7.7538e-04
Loss = 4.3320e-01, PNorm = 75.4888, GNorm = 0.9581, lr_0 = 7.7913e-04
Loss = 4.3063e-01, PNorm = 75.6292, GNorm = 0.9734, lr_0 = 7.8288e-04
Loss = 4.6369e-01, PNorm = 75.7478, GNorm = 1.0617, lr_0 = 7.8663e-04
Loss = 4.1248e-01, PNorm = 75.8853, GNorm = 0.9633, lr_0 = 7.9038e-04
Loss = 3.6617e-01, PNorm = 76.0200, GNorm = 1.1837, lr_0 = 7.9413e-04
Loss = 4.0329e-01, PNorm = 76.1502, GNorm = 0.8856, lr_0 = 7.9788e-04
Loss = 5.1396e-01, PNorm = 76.3001, GNorm = 1.4617, lr_0 = 8.0163e-04
Loss = 4.4092e-01, PNorm = 76.4526, GNorm = 1.9111, lr_0 = 8.0538e-04
Loss = 4.3911e-01, PNorm = 76.6068, GNorm = 1.2236, lr_0 = 8.0913e-04
Loss = 4.2829e-01, PNorm = 76.7620, GNorm = 0.9413, lr_0 = 8.1288e-04
Loss = 3.9556e-01, PNorm = 76.9118, GNorm = 1.7315, lr_0 = 8.1663e-04
Loss = 4.2222e-01, PNorm = 77.0400, GNorm = 1.2448, lr_0 = 8.2038e-04
Loss = 3.9639e-01, PNorm = 77.1874, GNorm = 1.5622, lr_0 = 8.2413e-04
Loss = 4.2341e-01, PNorm = 77.3260, GNorm = 1.6221, lr_0 = 8.2788e-04
Loss = 4.3935e-01, PNorm = 77.4618, GNorm = 1.2554, lr_0 = 8.3163e-04
Loss = 4.6489e-01, PNorm = 77.6118, GNorm = 1.3655, lr_0 = 8.3538e-04
Loss = 3.9628e-01, PNorm = 77.7413, GNorm = 1.6253, lr_0 = 8.3913e-04
Loss = 4.7275e-01, PNorm = 77.8839, GNorm = 1.9482, lr_0 = 8.4288e-04
Loss = 4.2355e-01, PNorm = 78.0225, GNorm = 0.9057, lr_0 = 8.4663e-04
Loss = 3.6512e-01, PNorm = 78.1583, GNorm = 1.0452, lr_0 = 8.5038e-04
Loss = 4.5711e-01, PNorm = 78.3041, GNorm = 1.4037, lr_0 = 8.5413e-04
Loss = 4.2570e-01, PNorm = 78.4300, GNorm = 1.1225, lr_0 = 8.5788e-04
Loss = 4.1887e-01, PNorm = 78.5772, GNorm = 1.0043, lr_0 = 8.6163e-04
Loss = 4.6298e-01, PNorm = 78.7157, GNorm = 0.9274, lr_0 = 8.6538e-04
Loss = 3.9704e-01, PNorm = 78.8653, GNorm = 0.8887, lr_0 = 8.6913e-04
Loss = 4.5410e-01, PNorm = 78.9924, GNorm = 1.0816, lr_0 = 8.7288e-04
Loss = 4.3838e-01, PNorm = 79.1453, GNorm = 1.4563, lr_0 = 8.7663e-04
Loss = 4.1094e-01, PNorm = 79.2824, GNorm = 1.0536, lr_0 = 8.8038e-04
Loss = 5.6117e-01, PNorm = 79.4439, GNorm = 0.8100, lr_0 = 8.8413e-04
Loss = 4.2558e-01, PNorm = 79.6064, GNorm = 1.8507, lr_0 = 8.8788e-04
Loss = 4.4256e-01, PNorm = 79.7729, GNorm = 1.0916, lr_0 = 8.9163e-04
Loss = 4.9868e-01, PNorm = 79.9168, GNorm = 0.9691, lr_0 = 8.9538e-04
Loss = 4.5093e-01, PNorm = 80.0853, GNorm = 1.7311, lr_0 = 8.9913e-04
Loss = 4.3653e-01, PNorm = 80.2404, GNorm = 1.2245, lr_0 = 9.0288e-04
Loss = 3.9061e-01, PNorm = 80.3943, GNorm = 1.6231, lr_0 = 9.0663e-04
Loss = 4.4429e-01, PNorm = 80.5531, GNorm = 1.4742, lr_0 = 9.1038e-04
Loss = 4.4051e-01, PNorm = 80.7073, GNorm = 1.0121, lr_0 = 9.1413e-04
Loss = 4.0495e-01, PNorm = 80.8690, GNorm = 1.3882, lr_0 = 9.1788e-04
Loss = 4.6018e-01, PNorm = 81.0112, GNorm = 1.2931, lr_0 = 9.2163e-04
Loss = 4.4719e-01, PNorm = 81.1697, GNorm = 1.8128, lr_0 = 9.2538e-04
Loss = 4.1796e-01, PNorm = 81.3221, GNorm = 1.0543, lr_0 = 9.2913e-04
Loss = 4.2787e-01, PNorm = 81.4788, GNorm = 1.1314, lr_0 = 9.3288e-04
Loss = 5.2291e-01, PNorm = 81.6266, GNorm = 1.1559, lr_0 = 9.3663e-04
Loss = 3.9053e-01, PNorm = 81.7907, GNorm = 0.9101, lr_0 = 9.4038e-04
Loss = 4.8941e-01, PNorm = 81.9454, GNorm = 1.8653, lr_0 = 9.4413e-04
Loss = 3.9289e-01, PNorm = 82.1033, GNorm = 1.2252, lr_0 = 9.4788e-04
Loss = 4.1718e-01, PNorm = 82.2762, GNorm = 0.9033, lr_0 = 9.5163e-04
Loss = 3.8361e-01, PNorm = 82.4504, GNorm = 1.0555, lr_0 = 9.5538e-04
Loss = 4.3738e-01, PNorm = 82.6164, GNorm = 1.8313, lr_0 = 9.5913e-04
Loss = 4.1288e-01, PNorm = 82.7777, GNorm = 0.8945, lr_0 = 9.6288e-04
Loss = 4.6003e-01, PNorm = 82.9536, GNorm = 1.5923, lr_0 = 9.6663e-04
Loss = 4.3369e-01, PNorm = 83.1068, GNorm = 1.3032, lr_0 = 9.7038e-04
Loss = 3.6820e-01, PNorm = 83.2544, GNorm = 1.1429, lr_0 = 9.7413e-04
Loss = 3.9258e-01, PNorm = 83.4058, GNorm = 1.3876, lr_0 = 9.7788e-04
Loss = 3.9691e-01, PNorm = 83.5502, GNorm = 1.0374, lr_0 = 9.8163e-04
Loss = 4.8286e-01, PNorm = 83.6975, GNorm = 1.1221, lr_0 = 9.8537e-04
Loss = 4.2398e-01, PNorm = 83.8599, GNorm = 2.0136, lr_0 = 9.8912e-04
Loss = 4.1563e-01, PNorm = 84.0287, GNorm = 0.9385, lr_0 = 9.9288e-04
Loss = 4.2905e-01, PNorm = 84.1937, GNorm = 0.7067, lr_0 = 9.9663e-04
Loss = 4.4860e-01, PNorm = 84.3614, GNorm = 1.2620, lr_0 = 9.9993e-04
Validation mae = 0.127493
Epoch 2
Loss = 3.1763e-01, PNorm = 84.5269, GNorm = 1.6231, lr_0 = 9.9925e-04
Loss = 2.9195e-01, PNorm = 84.7000, GNorm = 0.9042, lr_0 = 9.9856e-04
Loss = 2.5870e-01, PNorm = 84.8388, GNorm = 0.8260, lr_0 = 9.9788e-04
Loss = 2.8764e-01, PNorm = 84.9876, GNorm = 0.9049, lr_0 = 9.9719e-04
Loss = 2.9713e-01, PNorm = 85.1279, GNorm = 0.9696, lr_0 = 9.9651e-04
Loss = 2.7657e-01, PNorm = 85.2906, GNorm = 1.5844, lr_0 = 9.9583e-04
Loss = 3.3820e-01, PNorm = 85.4638, GNorm = 1.4319, lr_0 = 9.9515e-04
Loss = 2.5293e-01, PNorm = 85.6315, GNorm = 0.9161, lr_0 = 9.9446e-04
Loss = 2.7027e-01, PNorm = 85.8062, GNorm = 1.1195, lr_0 = 9.9378e-04
Loss = 3.1039e-01, PNorm = 85.9630, GNorm = 0.7935, lr_0 = 9.9310e-04
Loss = 2.5162e-01, PNorm = 86.1360, GNorm = 0.8315, lr_0 = 9.9242e-04
Loss = 2.8753e-01, PNorm = 86.3163, GNorm = 1.2194, lr_0 = 9.9174e-04
Loss = 2.7729e-01, PNorm = 86.4838, GNorm = 1.0995, lr_0 = 9.9106e-04
Loss = 2.5670e-01, PNorm = 86.6518, GNorm = 1.0268, lr_0 = 9.9038e-04
Loss = 2.7247e-01, PNorm = 86.8153, GNorm = 0.9582, lr_0 = 9.8971e-04
Loss = 3.0400e-01, PNorm = 86.9740, GNorm = 0.8686, lr_0 = 9.8903e-04
Loss = 2.9659e-01, PNorm = 87.1562, GNorm = 0.9529, lr_0 = 9.8835e-04
Loss = 3.5230e-01, PNorm = 87.3457, GNorm = 0.7520, lr_0 = 9.8767e-04
Loss = 3.2218e-01, PNorm = 87.5404, GNorm = 1.4704, lr_0 = 9.8700e-04
Loss = 3.0155e-01, PNorm = 87.7079, GNorm = 0.9105, lr_0 = 9.8632e-04
Loss = 2.2048e-01, PNorm = 87.8752, GNorm = 1.0823, lr_0 = 9.8564e-04
Loss = 3.8860e-01, PNorm = 88.0521, GNorm = 1.6028, lr_0 = 9.8497e-04
Loss = 3.1327e-01, PNorm = 88.2393, GNorm = 0.9959, lr_0 = 9.8429e-04
Loss = 3.0945e-01, PNorm = 88.4361, GNorm = 0.9476, lr_0 = 9.8362e-04
Loss = 2.7713e-01, PNorm = 88.6032, GNorm = 0.9110, lr_0 = 9.8295e-04
Loss = 2.5337e-01, PNorm = 88.7657, GNorm = 0.8458, lr_0 = 9.8227e-04
Loss = 2.9453e-01, PNorm = 88.9265, GNorm = 1.1940, lr_0 = 9.8160e-04
Loss = 3.2164e-01, PNorm = 89.0953, GNorm = 1.1470, lr_0 = 9.8093e-04
Loss = 3.1305e-01, PNorm = 89.2775, GNorm = 1.5358, lr_0 = 9.8026e-04
Loss = 3.1419e-01, PNorm = 89.4513, GNorm = 1.1196, lr_0 = 9.7958e-04
Loss = 2.9514e-01, PNorm = 89.6334, GNorm = 0.9767, lr_0 = 9.7891e-04
Loss = 2.7812e-01, PNorm = 89.7974, GNorm = 0.9827, lr_0 = 9.7824e-04
Loss = 3.4575e-01, PNorm = 89.9698, GNorm = 1.2918, lr_0 = 9.7757e-04
Loss = 2.8811e-01, PNorm = 90.1534, GNorm = 0.8851, lr_0 = 9.7690e-04
Loss = 3.0512e-01, PNorm = 90.3160, GNorm = 0.7052, lr_0 = 9.7623e-04
Loss = 3.2809e-01, PNorm = 90.5032, GNorm = 0.9398, lr_0 = 9.7556e-04
Loss = 3.1129e-01, PNorm = 90.6511, GNorm = 1.0387, lr_0 = 9.7490e-04
Loss = 2.9559e-01, PNorm = 90.8202, GNorm = 0.9544, lr_0 = 9.7423e-04
Loss = 2.9512e-01, PNorm = 90.9818, GNorm = 1.0990, lr_0 = 9.7356e-04
Loss = 3.3981e-01, PNorm = 91.1540, GNorm = 1.1075, lr_0 = 9.7289e-04
Loss = 2.9285e-01, PNorm = 91.3312, GNorm = 0.8081, lr_0 = 9.7223e-04
Loss = 4.0658e-01, PNorm = 91.5033, GNorm = 1.6325, lr_0 = 9.7156e-04
Loss = 3.2272e-01, PNorm = 91.6723, GNorm = 1.0002, lr_0 = 9.7090e-04
Loss = 3.1957e-01, PNorm = 91.8500, GNorm = 1.1664, lr_0 = 9.7023e-04
Loss = 3.3020e-01, PNorm = 92.0220, GNorm = 0.9797, lr_0 = 9.6957e-04
Loss = 2.9421e-01, PNorm = 92.1908, GNorm = 1.2160, lr_0 = 9.6890e-04
Loss = 3.0466e-01, PNorm = 92.3744, GNorm = 1.8486, lr_0 = 9.6824e-04
Loss = 2.8675e-01, PNorm = 92.5425, GNorm = 1.4029, lr_0 = 9.6757e-04
Loss = 3.1741e-01, PNorm = 92.7110, GNorm = 1.1879, lr_0 = 9.6691e-04
Loss = 3.2605e-01, PNorm = 92.8915, GNorm = 0.8083, lr_0 = 9.6625e-04
Loss = 3.1197e-01, PNorm = 93.0713, GNorm = 0.9676, lr_0 = 9.6559e-04
Loss = 3.5401e-01, PNorm = 93.2604, GNorm = 0.8901, lr_0 = 9.6493e-04
Loss = 3.5920e-01, PNorm = 93.4355, GNorm = 1.0087, lr_0 = 9.6427e-04
Loss = 2.8832e-01, PNorm = 93.6268, GNorm = 0.8186, lr_0 = 9.6360e-04
Loss = 3.4467e-01, PNorm = 93.8016, GNorm = 1.0099, lr_0 = 9.6294e-04
Loss = 2.8596e-01, PNorm = 93.9669, GNorm = 0.9785, lr_0 = 9.6228e-04
Loss = 3.7459e-01, PNorm = 94.1337, GNorm = 1.7984, lr_0 = 9.6163e-04
Loss = 3.6697e-01, PNorm = 94.3230, GNorm = 1.1243, lr_0 = 9.6097e-04
Loss = 2.9160e-01, PNorm = 94.5060, GNorm = 0.7800, lr_0 = 9.6031e-04
Loss = 3.5272e-01, PNorm = 94.6694, GNorm = 1.0014, lr_0 = 9.5965e-04
Loss = 2.6994e-01, PNorm = 94.8302, GNorm = 0.7325, lr_0 = 9.5899e-04
Loss = 3.2436e-01, PNorm = 94.9848, GNorm = 1.5174, lr_0 = 9.5834e-04
Loss = 3.1302e-01, PNorm = 95.1325, GNorm = 1.0564, lr_0 = 9.5768e-04
Loss = 3.0242e-01, PNorm = 95.2966, GNorm = 1.2746, lr_0 = 9.5702e-04
Loss = 2.6735e-01, PNorm = 95.4577, GNorm = 1.3001, lr_0 = 9.5637e-04
Loss = 3.1859e-01, PNorm = 95.6208, GNorm = 1.1761, lr_0 = 9.5571e-04
Loss = 2.9353e-01, PNorm = 95.7776, GNorm = 0.8152, lr_0 = 9.5506e-04
Loss = 3.2770e-01, PNorm = 95.9270, GNorm = 1.0560, lr_0 = 9.5440e-04
Loss = 3.2269e-01, PNorm = 96.0848, GNorm = 2.0489, lr_0 = 9.5375e-04
Loss = 2.8384e-01, PNorm = 96.2354, GNorm = 1.0355, lr_0 = 9.5310e-04
Loss = 3.1007e-01, PNorm = 96.3812, GNorm = 1.0206, lr_0 = 9.5244e-04
Loss = 3.1295e-01, PNorm = 96.5396, GNorm = 1.2361, lr_0 = 9.5179e-04
Loss = 3.2421e-01, PNorm = 96.6920, GNorm = 1.0610, lr_0 = 9.5114e-04
Loss = 3.2695e-01, PNorm = 96.8798, GNorm = 1.7302, lr_0 = 9.5049e-04
Loss = 3.2762e-01, PNorm = 97.0567, GNorm = 0.8078, lr_0 = 9.4984e-04
Loss = 3.3013e-01, PNorm = 97.2222, GNorm = 1.1984, lr_0 = 9.4919e-04
Loss = 3.7839e-01, PNorm = 97.4096, GNorm = 1.6212, lr_0 = 9.4854e-04
Loss = 3.2215e-01, PNorm = 97.5834, GNorm = 0.7675, lr_0 = 9.4789e-04
Loss = 3.2246e-01, PNorm = 97.7424, GNorm = 0.8338, lr_0 = 9.4724e-04
Loss = 3.6147e-01, PNorm = 97.8814, GNorm = 1.0111, lr_0 = 9.4659e-04
Loss = 2.9052e-01, PNorm = 98.0225, GNorm = 0.7638, lr_0 = 9.4594e-04
Loss = 2.5879e-01, PNorm = 98.1583, GNorm = 0.8593, lr_0 = 9.4529e-04
Loss = 3.7113e-01, PNorm = 98.2937, GNorm = 1.3135, lr_0 = 9.4464e-04
Loss = 3.3864e-01, PNorm = 98.4522, GNorm = 0.8414, lr_0 = 9.4400e-04
Loss = 3.2572e-01, PNorm = 98.6062, GNorm = 1.1233, lr_0 = 9.4335e-04
Loss = 3.1963e-01, PNorm = 98.7661, GNorm = 1.4815, lr_0 = 9.4270e-04
Loss = 2.9129e-01, PNorm = 98.9293, GNorm = 1.5006, lr_0 = 9.4206e-04
Loss = 3.0558e-01, PNorm = 99.0776, GNorm = 0.9840, lr_0 = 9.4141e-04
Loss = 2.8853e-01, PNorm = 99.2356, GNorm = 1.0381, lr_0 = 9.4077e-04
Loss = 3.3304e-01, PNorm = 99.3810, GNorm = 1.3169, lr_0 = 9.4012e-04
Loss = 3.2164e-01, PNorm = 99.5268, GNorm = 1.0390, lr_0 = 9.3948e-04
Loss = 3.0397e-01, PNorm = 99.6788, GNorm = 0.9411, lr_0 = 9.3884e-04
Loss = 3.3150e-01, PNorm = 99.8198, GNorm = 0.8441, lr_0 = 9.3819e-04
Loss = 3.4455e-01, PNorm = 99.9790, GNorm = 1.7030, lr_0 = 9.3755e-04
Loss = 3.9228e-01, PNorm = 100.1465, GNorm = 1.2869, lr_0 = 9.3691e-04
Loss = 3.1632e-01, PNorm = 100.3021, GNorm = 0.8905, lr_0 = 9.3627e-04
Loss = 2.9354e-01, PNorm = 100.4601, GNorm = 0.7979, lr_0 = 9.3562e-04
Loss = 3.1767e-01, PNorm = 100.6006, GNorm = 1.0275, lr_0 = 9.3498e-04
Loss = 2.9284e-01, PNorm = 100.7442, GNorm = 0.9012, lr_0 = 9.3434e-04
Loss = 3.2536e-01, PNorm = 100.8740, GNorm = 1.0856, lr_0 = 9.3370e-04
Loss = 3.0929e-01, PNorm = 101.0079, GNorm = 0.8866, lr_0 = 9.3306e-04
Loss = 3.2876e-01, PNorm = 101.1376, GNorm = 1.4648, lr_0 = 9.3242e-04
Loss = 2.7344e-01, PNorm = 101.2721, GNorm = 0.7505, lr_0 = 9.3178e-04
Loss = 3.5025e-01, PNorm = 101.3855, GNorm = 1.0833, lr_0 = 9.3115e-04
Loss = 3.2729e-01, PNorm = 101.5029, GNorm = 0.8383, lr_0 = 9.3051e-04
Loss = 3.3957e-01, PNorm = 101.6296, GNorm = 0.8856, lr_0 = 9.2987e-04
Loss = 3.4061e-01, PNorm = 101.7567, GNorm = 1.3525, lr_0 = 9.2923e-04
Loss = 2.9397e-01, PNorm = 101.8944, GNorm = 1.0029, lr_0 = 9.2860e-04
Loss = 3.0377e-01, PNorm = 102.0192, GNorm = 1.1377, lr_0 = 9.2796e-04
Loss = 3.4777e-01, PNorm = 102.1489, GNorm = 1.5952, lr_0 = 9.2733e-04
Loss = 2.9529e-01, PNorm = 102.2850, GNorm = 0.8256, lr_0 = 9.2669e-04
Loss = 3.4536e-01, PNorm = 102.4107, GNorm = 1.2205, lr_0 = 9.2606e-04
Loss = 3.5253e-01, PNorm = 102.5513, GNorm = 1.3115, lr_0 = 9.2542e-04
Loss = 3.1495e-01, PNorm = 102.6952, GNorm = 0.8383, lr_0 = 9.2479e-04
Loss = 3.2849e-01, PNorm = 102.8331, GNorm = 0.8159, lr_0 = 9.2415e-04
Loss = 3.3359e-01, PNorm = 102.9768, GNorm = 1.0801, lr_0 = 9.2352e-04
Loss = 2.8766e-01, PNorm = 103.0948, GNorm = 0.8725, lr_0 = 9.2289e-04
Loss = 3.6289e-01, PNorm = 103.2159, GNorm = 1.3283, lr_0 = 9.2226e-04
Loss = 3.9221e-01, PNorm = 103.3662, GNorm = 1.5678, lr_0 = 9.2162e-04
Loss = 3.2928e-01, PNorm = 103.5196, GNorm = 1.3194, lr_0 = 9.2099e-04
Validation mae = 0.125927
Epoch 3
Loss = 1.9288e-01, PNorm = 103.6607, GNorm = 0.8245, lr_0 = 9.2036e-04
Loss = 1.7946e-01, PNorm = 103.7850, GNorm = 0.8440, lr_0 = 9.1973e-04
Loss = 1.9949e-01, PNorm = 103.8900, GNorm = 0.9952, lr_0 = 9.1910e-04
Loss = 1.8023e-01, PNorm = 103.9977, GNorm = 1.0444, lr_0 = 9.1847e-04
Loss = 1.7766e-01, PNorm = 104.0943, GNorm = 0.7081, lr_0 = 9.1784e-04
Loss = 1.7386e-01, PNorm = 104.1917, GNorm = 1.1958, lr_0 = 9.1721e-04
Loss = 1.8501e-01, PNorm = 104.2843, GNorm = 1.0349, lr_0 = 9.1658e-04
Loss = 1.7654e-01, PNorm = 104.3909, GNorm = 1.2344, lr_0 = 9.1596e-04
Loss = 2.0562e-01, PNorm = 104.4796, GNorm = 0.8159, lr_0 = 9.1533e-04
Loss = 1.5823e-01, PNorm = 104.5842, GNorm = 1.9523, lr_0 = 9.1470e-04
Loss = 1.9111e-01, PNorm = 104.6817, GNorm = 0.8739, lr_0 = 9.1408e-04
Loss = 1.7527e-01, PNorm = 104.7778, GNorm = 0.6551, lr_0 = 9.1345e-04
Loss = 1.7749e-01, PNorm = 104.8804, GNorm = 0.8612, lr_0 = 9.1282e-04
Loss = 1.5920e-01, PNorm = 104.9759, GNorm = 0.8314, lr_0 = 9.1220e-04
Loss = 1.9428e-01, PNorm = 105.0786, GNorm = 0.5594, lr_0 = 9.1157e-04
Loss = 1.8925e-01, PNorm = 105.1976, GNorm = 1.3843, lr_0 = 9.1095e-04
Loss = 2.0775e-01, PNorm = 105.3060, GNorm = 0.8495, lr_0 = 9.1032e-04
Loss = 1.9169e-01, PNorm = 105.4369, GNorm = 1.1285, lr_0 = 9.0970e-04
Loss = 1.6851e-01, PNorm = 105.5373, GNorm = 1.0836, lr_0 = 9.0908e-04
Loss = 1.9063e-01, PNorm = 105.6499, GNorm = 1.1979, lr_0 = 9.0846e-04
Loss = 1.9004e-01, PNorm = 105.7639, GNorm = 0.5099, lr_0 = 9.0783e-04
Loss = 2.0149e-01, PNorm = 105.8774, GNorm = 0.9402, lr_0 = 9.0721e-04
Loss = 1.8691e-01, PNorm = 105.9988, GNorm = 0.8548, lr_0 = 9.0659e-04
Loss = 1.8153e-01, PNorm = 106.1206, GNorm = 0.9384, lr_0 = 9.0597e-04
Loss = 1.6401e-01, PNorm = 106.2400, GNorm = 0.7751, lr_0 = 9.0535e-04
Loss = 2.0606e-01, PNorm = 106.3617, GNorm = 0.7227, lr_0 = 9.0473e-04
Loss = 1.6557e-01, PNorm = 106.4638, GNorm = 0.9492, lr_0 = 9.0411e-04
Loss = 1.7936e-01, PNorm = 106.5656, GNorm = 0.8527, lr_0 = 9.0349e-04
Loss = 2.0177e-01, PNorm = 106.6720, GNorm = 0.8553, lr_0 = 9.0287e-04
Loss = 2.0445e-01, PNorm = 106.7789, GNorm = 1.0916, lr_0 = 9.0225e-04
Loss = 1.6867e-01, PNorm = 106.8961, GNorm = 0.8547, lr_0 = 9.0163e-04
Loss = 1.7839e-01, PNorm = 107.0144, GNorm = 0.8568, lr_0 = 9.0102e-04
Loss = 1.8816e-01, PNorm = 107.1302, GNorm = 0.8064, lr_0 = 9.0040e-04
Loss = 1.8240e-01, PNorm = 107.2358, GNorm = 1.0998, lr_0 = 8.9978e-04
Loss = 1.7967e-01, PNorm = 107.3494, GNorm = 0.6424, lr_0 = 8.9916e-04
Loss = 1.9766e-01, PNorm = 107.4671, GNorm = 0.8374, lr_0 = 8.9855e-04
Loss = 1.7138e-01, PNorm = 107.5922, GNorm = 0.6994, lr_0 = 8.9793e-04
Loss = 1.9153e-01, PNorm = 107.7029, GNorm = 0.8588, lr_0 = 8.9732e-04
Loss = 1.6411e-01, PNorm = 107.8251, GNorm = 0.9377, lr_0 = 8.9670e-04
Loss = 2.1655e-01, PNorm = 107.9345, GNorm = 2.5898, lr_0 = 8.9609e-04
Loss = 1.9947e-01, PNorm = 108.0459, GNorm = 1.1827, lr_0 = 8.9548e-04
Loss = 1.7036e-01, PNorm = 108.1579, GNorm = 0.7774, lr_0 = 8.9486e-04
Loss = 1.7948e-01, PNorm = 108.2652, GNorm = 0.8682, lr_0 = 8.9425e-04
Loss = 1.8050e-01, PNorm = 108.3858, GNorm = 1.0623, lr_0 = 8.9364e-04
Loss = 1.7846e-01, PNorm = 108.4989, GNorm = 1.0721, lr_0 = 8.9302e-04
Loss = 2.2828e-01, PNorm = 108.6175, GNorm = 0.9241, lr_0 = 8.9241e-04
Loss = 2.0476e-01, PNorm = 108.7437, GNorm = 1.0371, lr_0 = 8.9180e-04
Loss = 2.2289e-01, PNorm = 108.8697, GNorm = 1.2579, lr_0 = 8.9119e-04
Loss = 1.9711e-01, PNorm = 108.9965, GNorm = 0.6592, lr_0 = 8.9058e-04
Loss = 2.1130e-01, PNorm = 109.1247, GNorm = 0.7712, lr_0 = 8.8997e-04
Loss = 2.4791e-01, PNorm = 109.2595, GNorm = 1.4397, lr_0 = 8.8936e-04
Loss = 2.1739e-01, PNorm = 109.3943, GNorm = 1.3089, lr_0 = 8.8875e-04
Loss = 2.1674e-01, PNorm = 109.5249, GNorm = 0.8711, lr_0 = 8.8814e-04
Loss = 1.9584e-01, PNorm = 109.6675, GNorm = 0.6665, lr_0 = 8.8753e-04
Loss = 1.9988e-01, PNorm = 109.7949, GNorm = 1.2103, lr_0 = 8.8693e-04
Loss = 2.1377e-01, PNorm = 109.9141, GNorm = 0.9941, lr_0 = 8.8632e-04
Loss = 2.0683e-01, PNorm = 110.0453, GNorm = 0.9180, lr_0 = 8.8571e-04
Loss = 2.2051e-01, PNorm = 110.1696, GNorm = 0.7952, lr_0 = 8.8510e-04
Loss = 1.8103e-01, PNorm = 110.2948, GNorm = 1.0248, lr_0 = 8.8450e-04
Loss = 2.2648e-01, PNorm = 110.4147, GNorm = 0.8323, lr_0 = 8.8389e-04
Loss = 2.0139e-01, PNorm = 110.5355, GNorm = 1.2287, lr_0 = 8.8329e-04
Loss = 2.6894e-01, PNorm = 110.6662, GNorm = 0.9035, lr_0 = 8.8268e-04
Loss = 2.2143e-01, PNorm = 110.8005, GNorm = 0.9345, lr_0 = 8.8208e-04
Loss = 2.0879e-01, PNorm = 110.9445, GNorm = 0.7858, lr_0 = 8.8147e-04
Loss = 1.8577e-01, PNorm = 111.0829, GNorm = 1.0993, lr_0 = 8.8087e-04
Loss = 2.4476e-01, PNorm = 111.2163, GNorm = 0.9788, lr_0 = 8.8026e-04
Loss = 1.9405e-01, PNorm = 111.3589, GNorm = 0.8180, lr_0 = 8.7966e-04
Loss = 1.7739e-01, PNorm = 111.4756, GNorm = 1.0117, lr_0 = 8.7906e-04
Loss = 2.1930e-01, PNorm = 111.6069, GNorm = 1.2430, lr_0 = 8.7846e-04
Loss = 2.3072e-01, PNorm = 111.7289, GNorm = 1.0229, lr_0 = 8.7785e-04
Loss = 2.2971e-01, PNorm = 111.8682, GNorm = 0.7755, lr_0 = 8.7725e-04
Loss = 2.5066e-01, PNorm = 112.0131, GNorm = 1.6521, lr_0 = 8.7665e-04
Loss = 1.9142e-01, PNorm = 112.1520, GNorm = 0.7932, lr_0 = 8.7605e-04
Loss = 1.9839e-01, PNorm = 112.2731, GNorm = 1.2669, lr_0 = 8.7545e-04
Loss = 1.8372e-01, PNorm = 112.3950, GNorm = 0.7101, lr_0 = 8.7485e-04
Loss = 2.5447e-01, PNorm = 112.5102, GNorm = 0.9543, lr_0 = 8.7425e-04
Loss = 2.4572e-01, PNorm = 112.6263, GNorm = 0.9510, lr_0 = 8.7365e-04
Loss = 2.3881e-01, PNorm = 112.7582, GNorm = 1.1032, lr_0 = 8.7306e-04
Loss = 1.9850e-01, PNorm = 112.8854, GNorm = 1.1166, lr_0 = 8.7246e-04
Loss = 1.7443e-01, PNorm = 113.0107, GNorm = 0.8336, lr_0 = 8.7186e-04
Loss = 1.9285e-01, PNorm = 113.1190, GNorm = 0.8018, lr_0 = 8.7126e-04
Loss = 2.0649e-01, PNorm = 113.2389, GNorm = 1.2265, lr_0 = 8.7067e-04
Loss = 2.0801e-01, PNorm = 113.3605, GNorm = 0.9380, lr_0 = 8.7007e-04
Loss = 1.7886e-01, PNorm = 113.4797, GNorm = 1.2534, lr_0 = 8.6947e-04
Loss = 2.1955e-01, PNorm = 113.6005, GNorm = 0.8037, lr_0 = 8.6888e-04
Loss = 1.7010e-01, PNorm = 113.7174, GNorm = 0.5970, lr_0 = 8.6828e-04
Loss = 2.3829e-01, PNorm = 113.8307, GNorm = 1.3886, lr_0 = 8.6769e-04
Loss = 1.9666e-01, PNorm = 113.9559, GNorm = 0.8063, lr_0 = 8.6709e-04
Loss = 2.1891e-01, PNorm = 114.0750, GNorm = 0.9081, lr_0 = 8.6650e-04
Loss = 2.2054e-01, PNorm = 114.1915, GNorm = 1.3549, lr_0 = 8.6590e-04
Loss = 2.3461e-01, PNorm = 114.3070, GNorm = 1.3793, lr_0 = 8.6531e-04
Loss = 2.3163e-01, PNorm = 114.4404, GNorm = 0.8776, lr_0 = 8.6472e-04
Loss = 2.1219e-01, PNorm = 114.5658, GNorm = 0.7390, lr_0 = 8.6413e-04
Loss = 2.3812e-01, PNorm = 114.6898, GNorm = 0.9076, lr_0 = 8.6353e-04
Loss = 1.9835e-01, PNorm = 114.8245, GNorm = 1.4091, lr_0 = 8.6294e-04
Loss = 2.2657e-01, PNorm = 114.9525, GNorm = 1.2070, lr_0 = 8.6235e-04
Loss = 2.0746e-01, PNorm = 115.0813, GNorm = 1.2325, lr_0 = 8.6176e-04
Loss = 1.9516e-01, PNorm = 115.2049, GNorm = 0.8005, lr_0 = 8.6117e-04
Loss = 1.9431e-01, PNorm = 115.3291, GNorm = 1.3192, lr_0 = 8.6058e-04
Loss = 2.0587e-01, PNorm = 115.4502, GNorm = 1.1193, lr_0 = 8.5999e-04
Loss = 1.9656e-01, PNorm = 115.5666, GNorm = 0.9910, lr_0 = 8.5940e-04
Loss = 1.8795e-01, PNorm = 115.6754, GNorm = 1.1800, lr_0 = 8.5881e-04
Loss = 1.9031e-01, PNorm = 115.7842, GNorm = 0.8964, lr_0 = 8.5823e-04
Loss = 1.9484e-01, PNorm = 115.8972, GNorm = 0.8099, lr_0 = 8.5764e-04
Loss = 2.0536e-01, PNorm = 116.0089, GNorm = 0.8480, lr_0 = 8.5705e-04
Loss = 2.4670e-01, PNorm = 116.1263, GNorm = 0.8536, lr_0 = 8.5646e-04
Loss = 2.5025e-01, PNorm = 116.2586, GNorm = 0.7631, lr_0 = 8.5588e-04
Loss = 2.1063e-01, PNorm = 116.3929, GNorm = 1.0498, lr_0 = 8.5529e-04
Loss = 2.2350e-01, PNorm = 116.5217, GNorm = 0.7209, lr_0 = 8.5470e-04
Loss = 2.2272e-01, PNorm = 116.6491, GNorm = 1.2094, lr_0 = 8.5412e-04
Loss = 1.9772e-01, PNorm = 116.7699, GNorm = 0.8257, lr_0 = 8.5353e-04
Loss = 2.4857e-01, PNorm = 116.8932, GNorm = 0.7636, lr_0 = 8.5295e-04
Loss = 2.3760e-01, PNorm = 117.0043, GNorm = 1.0966, lr_0 = 8.5236e-04
Loss = 2.5662e-01, PNorm = 117.1373, GNorm = 2.8480, lr_0 = 8.5178e-04
Loss = 2.4051e-01, PNorm = 117.2548, GNorm = 0.7417, lr_0 = 8.5120e-04
Loss = 2.1865e-01, PNorm = 117.3808, GNorm = 0.6723, lr_0 = 8.5061e-04
Loss = 2.3947e-01, PNorm = 117.4999, GNorm = 1.3756, lr_0 = 8.5003e-04
Loss = 2.4175e-01, PNorm = 117.6369, GNorm = 1.4545, lr_0 = 8.4945e-04
Loss = 2.0350e-01, PNorm = 117.7639, GNorm = 1.1555, lr_0 = 8.4887e-04
Loss = 2.1102e-01, PNorm = 117.8878, GNorm = 0.7602, lr_0 = 8.4828e-04
Validation mae = 0.124971
Epoch 4
Loss = 1.1910e-01, PNorm = 118.0045, GNorm = 0.6405, lr_0 = 8.4770e-04
Loss = 1.5207e-01, PNorm = 118.0942, GNorm = 1.1830, lr_0 = 8.4712e-04
Loss = 1.3521e-01, PNorm = 118.1922, GNorm = 1.0588, lr_0 = 8.4654e-04
Loss = 1.1571e-01, PNorm = 118.2774, GNorm = 0.6760, lr_0 = 8.4596e-04
Loss = 1.3003e-01, PNorm = 118.3593, GNorm = 0.7311, lr_0 = 8.4538e-04
Loss = 1.2262e-01, PNorm = 118.4344, GNorm = 0.8713, lr_0 = 8.4480e-04
Loss = 1.1505e-01, PNorm = 118.5186, GNorm = 0.6018, lr_0 = 8.4423e-04
Loss = 1.2601e-01, PNorm = 118.5996, GNorm = 0.6318, lr_0 = 8.4365e-04
Loss = 1.2450e-01, PNorm = 118.6754, GNorm = 0.5131, lr_0 = 8.4307e-04
Loss = 1.1240e-01, PNorm = 118.7466, GNorm = 0.7044, lr_0 = 8.4249e-04
Loss = 1.1572e-01, PNorm = 118.8300, GNorm = 0.5977, lr_0 = 8.4191e-04
Loss = 1.0970e-01, PNorm = 118.9093, GNorm = 0.5887, lr_0 = 8.4134e-04
Loss = 1.1597e-01, PNorm = 118.9798, GNorm = 0.5492, lr_0 = 8.4076e-04
Loss = 1.3429e-01, PNorm = 119.0598, GNorm = 0.6636, lr_0 = 8.4019e-04
Loss = 1.1320e-01, PNorm = 119.1415, GNorm = 0.9002, lr_0 = 8.3961e-04
Loss = 1.0436e-01, PNorm = 119.2275, GNorm = 0.7162, lr_0 = 8.3903e-04
Loss = 1.1125e-01, PNorm = 119.3041, GNorm = 0.7905, lr_0 = 8.3846e-04
Loss = 1.1263e-01, PNorm = 119.3865, GNorm = 1.1364, lr_0 = 8.3789e-04
Loss = 1.1785e-01, PNorm = 119.4649, GNorm = 0.7738, lr_0 = 8.3731e-04
Loss = 1.2721e-01, PNorm = 119.5509, GNorm = 0.7223, lr_0 = 8.3674e-04
Loss = 1.1449e-01, PNorm = 119.6379, GNorm = 0.7673, lr_0 = 8.3616e-04
Loss = 1.2567e-01, PNorm = 119.7194, GNorm = 0.7145, lr_0 = 8.3559e-04
Loss = 1.0987e-01, PNorm = 119.7913, GNorm = 0.6717, lr_0 = 8.3502e-04
Loss = 1.2188e-01, PNorm = 119.8634, GNorm = 0.8435, lr_0 = 8.3445e-04
Loss = 1.1487e-01, PNorm = 119.9512, GNorm = 1.1553, lr_0 = 8.3388e-04
Loss = 1.2126e-01, PNorm = 120.0303, GNorm = 0.7030, lr_0 = 8.3330e-04
Loss = 1.1222e-01, PNorm = 120.1037, GNorm = 0.6989, lr_0 = 8.3273e-04
Loss = 8.8640e-02, PNorm = 120.1756, GNorm = 0.4945, lr_0 = 8.3216e-04
Loss = 1.1076e-01, PNorm = 120.2471, GNorm = 0.5878, lr_0 = 8.3159e-04
Loss = 9.9304e-02, PNorm = 120.3197, GNorm = 0.7039, lr_0 = 8.3102e-04
Loss = 1.3274e-01, PNorm = 120.3918, GNorm = 0.6572, lr_0 = 8.3045e-04
Loss = 1.2801e-01, PNorm = 120.4731, GNorm = 0.6197, lr_0 = 8.2988e-04
Loss = 1.1186e-01, PNorm = 120.5538, GNorm = 0.6355, lr_0 = 8.2932e-04
Loss = 1.3133e-01, PNorm = 120.6369, GNorm = 1.0101, lr_0 = 8.2875e-04
Loss = 1.1157e-01, PNorm = 120.7230, GNorm = 0.7809, lr_0 = 8.2818e-04
Loss = 1.0502e-01, PNorm = 120.8028, GNorm = 1.0235, lr_0 = 8.2761e-04
Loss = 1.2970e-01, PNorm = 120.8834, GNorm = 0.6229, lr_0 = 8.2705e-04
Loss = 1.1853e-01, PNorm = 120.9665, GNorm = 0.8503, lr_0 = 8.2648e-04
Loss = 1.1366e-01, PNorm = 121.0543, GNorm = 0.7790, lr_0 = 8.2591e-04
Loss = 1.1750e-01, PNorm = 121.1400, GNorm = 0.5922, lr_0 = 8.2535e-04
Loss = 1.1510e-01, PNorm = 121.2185, GNorm = 1.0394, lr_0 = 8.2478e-04
Loss = 1.0963e-01, PNorm = 121.3005, GNorm = 1.1435, lr_0 = 8.2422e-04
Loss = 1.3666e-01, PNorm = 121.3890, GNorm = 0.9100, lr_0 = 8.2365e-04
Loss = 1.4755e-01, PNorm = 121.4826, GNorm = 0.9325, lr_0 = 8.2309e-04
Loss = 1.4053e-01, PNorm = 121.5872, GNorm = 1.0188, lr_0 = 8.2252e-04
Loss = 1.3799e-01, PNorm = 121.6758, GNorm = 0.8988, lr_0 = 8.2196e-04
Loss = 1.3257e-01, PNorm = 121.7671, GNorm = 0.8931, lr_0 = 8.2140e-04
Loss = 1.3556e-01, PNorm = 121.8578, GNorm = 0.5466, lr_0 = 8.2084e-04
Loss = 1.3673e-01, PNorm = 121.9631, GNorm = 0.8712, lr_0 = 8.2027e-04
Loss = 1.2283e-01, PNorm = 122.0528, GNorm = 1.2249, lr_0 = 8.1971e-04
Loss = 1.2940e-01, PNorm = 122.1430, GNorm = 1.3845, lr_0 = 8.1915e-04
Loss = 1.3520e-01, PNorm = 122.2339, GNorm = 0.9598, lr_0 = 8.1859e-04
Loss = 1.2282e-01, PNorm = 122.3158, GNorm = 0.5246, lr_0 = 8.1803e-04
Loss = 1.7825e-01, PNorm = 122.4098, GNorm = 1.1596, lr_0 = 8.1747e-04
Loss = 1.2738e-01, PNorm = 122.5008, GNorm = 1.0179, lr_0 = 8.1691e-04
Loss = 1.1178e-01, PNorm = 122.5873, GNorm = 0.6773, lr_0 = 8.1635e-04
Loss = 1.3971e-01, PNorm = 122.6682, GNorm = 0.4320, lr_0 = 8.1579e-04
Loss = 1.4181e-01, PNorm = 122.7563, GNorm = 0.9304, lr_0 = 8.1523e-04
Loss = 1.1649e-01, PNorm = 122.8451, GNorm = 0.6290, lr_0 = 8.1467e-04
Loss = 1.6537e-01, PNorm = 122.9346, GNorm = 0.8521, lr_0 = 8.1411e-04
Loss = 1.2340e-01, PNorm = 123.0263, GNorm = 0.6479, lr_0 = 8.1355e-04
Loss = 1.2463e-01, PNorm = 123.1177, GNorm = 0.9623, lr_0 = 8.1300e-04
Loss = 1.1847e-01, PNorm = 123.2060, GNorm = 0.6605, lr_0 = 8.1244e-04
Loss = 1.3017e-01, PNorm = 123.2970, GNorm = 0.6605, lr_0 = 8.1188e-04
Loss = 1.2278e-01, PNorm = 123.3972, GNorm = 0.7729, lr_0 = 8.1133e-04
Loss = 1.4171e-01, PNorm = 123.4907, GNorm = 0.8660, lr_0 = 8.1077e-04
Loss = 1.1487e-01, PNorm = 123.5925, GNorm = 1.1638, lr_0 = 8.1022e-04
Loss = 1.4746e-01, PNorm = 123.6900, GNorm = 1.2849, lr_0 = 8.0966e-04
Loss = 1.3047e-01, PNorm = 123.7907, GNorm = 0.9872, lr_0 = 8.0911e-04
Loss = 1.3352e-01, PNorm = 123.8850, GNorm = 0.6432, lr_0 = 8.0855e-04
Loss = 1.3447e-01, PNorm = 123.9863, GNorm = 0.5108, lr_0 = 8.0800e-04
Loss = 1.3806e-01, PNorm = 124.0884, GNorm = 0.5607, lr_0 = 8.0745e-04
Loss = 1.3406e-01, PNorm = 124.1909, GNorm = 0.7824, lr_0 = 8.0689e-04
Loss = 1.7464e-01, PNorm = 124.2912, GNorm = 0.5449, lr_0 = 8.0634e-04
Loss = 1.5298e-01, PNorm = 124.4068, GNorm = 1.0434, lr_0 = 8.0579e-04
Loss = 1.3382e-01, PNorm = 124.5151, GNorm = 0.6901, lr_0 = 8.0523e-04
Loss = 1.3929e-01, PNorm = 124.6133, GNorm = 1.3506, lr_0 = 8.0468e-04
Loss = 1.4791e-01, PNorm = 124.7226, GNorm = 0.8704, lr_0 = 8.0413e-04
Loss = 1.3856e-01, PNorm = 124.8145, GNorm = 0.9060, lr_0 = 8.0358e-04
Loss = 1.3753e-01, PNorm = 124.9129, GNorm = 0.7256, lr_0 = 8.0303e-04
Loss = 1.2553e-01, PNorm = 125.0080, GNorm = 0.8229, lr_0 = 8.0248e-04
Loss = 1.3879e-01, PNorm = 125.0910, GNorm = 4.0916, lr_0 = 8.0193e-04
Loss = 1.3419e-01, PNorm = 125.1671, GNorm = 0.9176, lr_0 = 8.0138e-04
Loss = 1.5418e-01, PNorm = 125.2703, GNorm = 1.0081, lr_0 = 8.0083e-04
Loss = 1.6519e-01, PNorm = 125.3766, GNorm = 0.8588, lr_0 = 8.0028e-04
Loss = 1.5778e-01, PNorm = 125.4848, GNorm = 0.5794, lr_0 = 7.9974e-04
Loss = 1.3279e-01, PNorm = 125.5931, GNorm = 0.7935, lr_0 = 7.9919e-04
Loss = 1.2567e-01, PNorm = 125.6981, GNorm = 0.8936, lr_0 = 7.9864e-04
Loss = 1.4316e-01, PNorm = 125.8017, GNorm = 0.9846, lr_0 = 7.9809e-04
Loss = 1.2578e-01, PNorm = 125.9058, GNorm = 0.6170, lr_0 = 7.9755e-04
Loss = 1.3973e-01, PNorm = 126.0122, GNorm = 0.8701, lr_0 = 7.9700e-04
Loss = 1.5919e-01, PNorm = 126.1228, GNorm = 1.2320, lr_0 = 7.9645e-04
Loss = 1.3469e-01, PNorm = 126.2303, GNorm = 0.6103, lr_0 = 7.9591e-04
Loss = 1.5082e-01, PNorm = 126.3434, GNorm = 0.8063, lr_0 = 7.9536e-04
Loss = 1.3834e-01, PNorm = 126.4518, GNorm = 0.8873, lr_0 = 7.9482e-04
Loss = 1.2573e-01, PNorm = 126.5549, GNorm = 0.8740, lr_0 = 7.9427e-04
Loss = 1.4057e-01, PNorm = 126.6556, GNorm = 1.5810, lr_0 = 7.9373e-04
Loss = 1.5003e-01, PNorm = 126.7404, GNorm = 0.9079, lr_0 = 7.9319e-04
Loss = 1.6815e-01, PNorm = 126.8451, GNorm = 1.0823, lr_0 = 7.9264e-04
Loss = 1.5112e-01, PNorm = 126.9505, GNorm = 0.6484, lr_0 = 7.9210e-04
Loss = 1.6452e-01, PNorm = 127.0616, GNorm = 0.5820, lr_0 = 7.9156e-04
Loss = 1.2486e-01, PNorm = 127.1614, GNorm = 1.0897, lr_0 = 7.9101e-04
Loss = 1.5507e-01, PNorm = 127.2679, GNorm = 0.6130, lr_0 = 7.9047e-04
Loss = 1.3417e-01, PNorm = 127.3651, GNorm = 0.6885, lr_0 = 7.8993e-04
Loss = 1.5519e-01, PNorm = 127.4611, GNorm = 0.8705, lr_0 = 7.8939e-04
Loss = 1.4721e-01, PNorm = 127.5606, GNorm = 0.8588, lr_0 = 7.8885e-04
Loss = 1.4474e-01, PNorm = 127.6560, GNorm = 0.5546, lr_0 = 7.8831e-04
Loss = 1.3259e-01, PNorm = 127.7564, GNorm = 0.5672, lr_0 = 7.8777e-04
Loss = 1.3838e-01, PNorm = 127.8530, GNorm = 1.3351, lr_0 = 7.8723e-04
Loss = 1.4361e-01, PNorm = 127.9529, GNorm = 0.7636, lr_0 = 7.8669e-04
Loss = 1.3553e-01, PNorm = 128.0664, GNorm = 0.8158, lr_0 = 7.8615e-04
Loss = 1.7901e-01, PNorm = 128.1810, GNorm = 0.7256, lr_0 = 7.8561e-04
Loss = 1.4424e-01, PNorm = 128.2816, GNorm = 1.3874, lr_0 = 7.8507e-04
Loss = 1.8213e-01, PNorm = 128.3990, GNorm = 1.1227, lr_0 = 7.8454e-04
Loss = 1.8631e-01, PNorm = 128.5205, GNorm = 0.7743, lr_0 = 7.8400e-04
Loss = 1.4887e-01, PNorm = 128.6561, GNorm = 0.7738, lr_0 = 7.8346e-04
Loss = 1.6329e-01, PNorm = 128.7754, GNorm = 0.6464, lr_0 = 7.8293e-04
Loss = 1.4839e-01, PNorm = 128.9008, GNorm = 0.7874, lr_0 = 7.8239e-04
Loss = 1.4284e-01, PNorm = 129.0098, GNorm = 1.3541, lr_0 = 7.8185e-04
Loss = 1.5571e-01, PNorm = 129.1130, GNorm = 1.0624, lr_0 = 7.8132e-04
Validation mae = 0.124136
Epoch 5
Loss = 9.4913e-02, PNorm = 129.2023, GNorm = 0.7313, lr_0 = 7.8078e-04
Loss = 8.9639e-02, PNorm = 129.2778, GNorm = 0.6565, lr_0 = 7.8025e-04
Loss = 9.2333e-02, PNorm = 129.3440, GNorm = 0.7020, lr_0 = 7.7971e-04
Loss = 9.6492e-02, PNorm = 129.4068, GNorm = 0.9686, lr_0 = 7.7918e-04
Loss = 8.2449e-02, PNorm = 129.4634, GNorm = 0.6091, lr_0 = 7.7864e-04
Loss = 9.0364e-02, PNorm = 129.5238, GNorm = 0.6558, lr_0 = 7.7811e-04
Loss = 8.5921e-02, PNorm = 129.5896, GNorm = 0.7534, lr_0 = 7.7758e-04
Loss = 7.0777e-02, PNorm = 129.6504, GNorm = 0.6659, lr_0 = 7.7705e-04
Loss = 8.2201e-02, PNorm = 129.7139, GNorm = 0.6872, lr_0 = 7.7651e-04
Loss = 6.3440e-02, PNorm = 129.7692, GNorm = 0.4663, lr_0 = 7.7598e-04
Loss = 8.1445e-02, PNorm = 129.8249, GNorm = 0.7155, lr_0 = 7.7545e-04
Loss = 8.5968e-02, PNorm = 129.8817, GNorm = 0.5150, lr_0 = 7.7492e-04
Loss = 8.4450e-02, PNorm = 129.9386, GNorm = 0.3555, lr_0 = 7.7439e-04
Loss = 7.5348e-02, PNorm = 129.9965, GNorm = 0.3950, lr_0 = 7.7386e-04
Loss = 7.4929e-02, PNorm = 130.0499, GNorm = 0.4888, lr_0 = 7.7333e-04
Loss = 8.0785e-02, PNorm = 130.1077, GNorm = 0.5928, lr_0 = 7.7280e-04
Loss = 8.2051e-02, PNorm = 130.1698, GNorm = 0.6591, lr_0 = 7.7227e-04
Loss = 7.9285e-02, PNorm = 130.2276, GNorm = 0.5106, lr_0 = 7.7174e-04
Loss = 7.7981e-02, PNorm = 130.2801, GNorm = 0.5979, lr_0 = 7.7121e-04
Loss = 7.3586e-02, PNorm = 130.3394, GNorm = 0.6455, lr_0 = 7.7068e-04
Loss = 8.7941e-02, PNorm = 130.4094, GNorm = 0.6043, lr_0 = 7.7015e-04
Loss = 8.5427e-02, PNorm = 130.4753, GNorm = 0.8542, lr_0 = 7.6963e-04
Loss = 8.5267e-02, PNorm = 130.5376, GNorm = 0.8622, lr_0 = 7.6910e-04
Loss = 6.6461e-02, PNorm = 130.6044, GNorm = 0.3970, lr_0 = 7.6857e-04
Loss = 7.7599e-02, PNorm = 130.6628, GNorm = 0.7336, lr_0 = 7.6805e-04
Loss = 9.7547e-02, PNorm = 130.7260, GNorm = 0.4109, lr_0 = 7.6752e-04
Loss = 6.9408e-02, PNorm = 130.7944, GNorm = 0.5871, lr_0 = 7.6699e-04
Loss = 7.8596e-02, PNorm = 130.8589, GNorm = 0.6034, lr_0 = 7.6647e-04
Loss = 8.2458e-02, PNorm = 130.9314, GNorm = 0.4923, lr_0 = 7.6594e-04
Loss = 8.8375e-02, PNorm = 130.9973, GNorm = 0.9064, lr_0 = 7.6542e-04
Loss = 8.3585e-02, PNorm = 131.0630, GNorm = 0.4628, lr_0 = 7.6489e-04
Loss = 7.5613e-02, PNorm = 131.1367, GNorm = 0.5181, lr_0 = 7.6437e-04
Loss = 8.4762e-02, PNorm = 131.2021, GNorm = 0.4953, lr_0 = 7.6385e-04
Loss = 8.7657e-02, PNorm = 131.2698, GNorm = 0.6671, lr_0 = 7.6332e-04
Loss = 7.1808e-02, PNorm = 131.3378, GNorm = 0.7174, lr_0 = 7.6280e-04
Loss = 9.0220e-02, PNorm = 131.3979, GNorm = 0.5548, lr_0 = 7.6228e-04
Loss = 7.5363e-02, PNorm = 131.4632, GNorm = 0.6001, lr_0 = 7.6176e-04
Loss = 1.0264e-01, PNorm = 131.5367, GNorm = 0.6968, lr_0 = 7.6123e-04
Loss = 7.3146e-02, PNorm = 131.6079, GNorm = 0.6526, lr_0 = 7.6071e-04
Loss = 1.1384e-01, PNorm = 131.6786, GNorm = 0.5218, lr_0 = 7.6019e-04
Loss = 8.0265e-02, PNorm = 131.7531, GNorm = 0.7129, lr_0 = 7.5967e-04
Loss = 8.4163e-02, PNorm = 131.8277, GNorm = 0.9995, lr_0 = 7.5915e-04
Loss = 1.0172e-01, PNorm = 131.8973, GNorm = 0.8285, lr_0 = 7.5863e-04
Loss = 9.5302e-02, PNorm = 131.9745, GNorm = 0.9744, lr_0 = 7.5811e-04
Loss = 8.6916e-02, PNorm = 132.0506, GNorm = 0.6219, lr_0 = 7.5759e-04
Loss = 9.7159e-02, PNorm = 132.1311, GNorm = 0.9670, lr_0 = 7.5707e-04
Loss = 7.2532e-02, PNorm = 132.1989, GNorm = 0.6831, lr_0 = 7.5655e-04
Loss = 9.4248e-02, PNorm = 132.2838, GNorm = 1.3343, lr_0 = 7.5603e-04
Loss = 8.5725e-02, PNorm = 132.3621, GNorm = 0.6960, lr_0 = 7.5552e-04
Loss = 1.3016e-01, PNorm = 132.4397, GNorm = 0.8328, lr_0 = 7.5500e-04
Loss = 1.0312e-01, PNorm = 132.5238, GNorm = 0.6275, lr_0 = 7.5448e-04
Loss = 7.4786e-02, PNorm = 132.6013, GNorm = 0.6302, lr_0 = 7.5397e-04
Loss = 8.5519e-02, PNorm = 132.6728, GNorm = 0.7058, lr_0 = 7.5345e-04
Loss = 8.8513e-02, PNorm = 132.7434, GNorm = 0.6749, lr_0 = 7.5293e-04
Loss = 7.7681e-02, PNorm = 132.8169, GNorm = 0.6659, lr_0 = 7.5242e-04
Loss = 7.5817e-02, PNorm = 132.8917, GNorm = 0.5776, lr_0 = 7.5190e-04
Loss = 1.0881e-01, PNorm = 132.9654, GNorm = 0.5344, lr_0 = 7.5139e-04
Loss = 7.8182e-02, PNorm = 133.0339, GNorm = 1.1338, lr_0 = 7.5087e-04
Loss = 9.2429e-02, PNorm = 133.1070, GNorm = 0.5676, lr_0 = 7.5036e-04
Loss = 8.4983e-02, PNorm = 133.1785, GNorm = 0.4515, lr_0 = 7.4984e-04
Loss = 8.5766e-02, PNorm = 133.2477, GNorm = 0.6254, lr_0 = 7.4933e-04
Loss = 8.4507e-02, PNorm = 133.3203, GNorm = 0.8581, lr_0 = 7.4882e-04
Loss = 8.1430e-02, PNorm = 133.3914, GNorm = 0.8479, lr_0 = 7.4830e-04
Loss = 7.9100e-02, PNorm = 133.4627, GNorm = 0.8358, lr_0 = 7.4779e-04
Loss = 9.5689e-02, PNorm = 133.5332, GNorm = 0.7411, lr_0 = 7.4728e-04
Loss = 9.0940e-02, PNorm = 133.6115, GNorm = 0.6964, lr_0 = 7.4677e-04
Loss = 1.1926e-01, PNorm = 133.6898, GNorm = 0.6309, lr_0 = 7.4625e-04
Loss = 1.0270e-01, PNorm = 133.7657, GNorm = 0.7451, lr_0 = 7.4574e-04
Loss = 1.0355e-01, PNorm = 133.8447, GNorm = 0.4766, lr_0 = 7.4523e-04
Loss = 8.1302e-02, PNorm = 133.9237, GNorm = 0.7951, lr_0 = 7.4472e-04
Loss = 8.3453e-02, PNorm = 133.9981, GNorm = 0.4922, lr_0 = 7.4421e-04
Loss = 8.3482e-02, PNorm = 134.0782, GNorm = 0.9411, lr_0 = 7.4370e-04
Loss = 7.2131e-02, PNorm = 134.1506, GNorm = 0.5013, lr_0 = 7.4319e-04
Loss = 1.0896e-01, PNorm = 134.2267, GNorm = 0.8271, lr_0 = 7.4268e-04
Loss = 9.0181e-02, PNorm = 134.2997, GNorm = 0.4872, lr_0 = 7.4217e-04
Loss = 8.3263e-02, PNorm = 134.3787, GNorm = 0.9399, lr_0 = 7.4167e-04
Loss = 9.5362e-02, PNorm = 134.4544, GNorm = 0.5468, lr_0 = 7.4116e-04
Loss = 8.5283e-02, PNorm = 134.5315, GNorm = 0.4831, lr_0 = 7.4065e-04
Loss = 1.0007e-01, PNorm = 134.6121, GNorm = 0.6024, lr_0 = 7.4014e-04
Loss = 9.9253e-02, PNorm = 134.6989, GNorm = 1.2962, lr_0 = 7.3964e-04
Loss = 1.1262e-01, PNorm = 134.7811, GNorm = 1.0230, lr_0 = 7.3913e-04
Loss = 8.9634e-02, PNorm = 134.8630, GNorm = 0.5390, lr_0 = 7.3862e-04
Loss = 8.1919e-02, PNorm = 134.9345, GNorm = 0.7622, lr_0 = 7.3812e-04
Loss = 8.9085e-02, PNorm = 135.0012, GNorm = 0.6193, lr_0 = 7.3761e-04
Loss = 1.1389e-01, PNorm = 135.0789, GNorm = 0.5184, lr_0 = 7.3711e-04
Loss = 8.8415e-02, PNorm = 135.1472, GNorm = 0.9751, lr_0 = 7.3660e-04
Loss = 7.8820e-02, PNorm = 135.2279, GNorm = 0.8491, lr_0 = 7.3610e-04
Loss = 8.6100e-02, PNorm = 135.3023, GNorm = 0.6782, lr_0 = 7.3559e-04
Loss = 9.0181e-02, PNorm = 135.3839, GNorm = 0.4752, lr_0 = 7.3509e-04
Loss = 9.6844e-02, PNorm = 135.4652, GNorm = 0.6039, lr_0 = 7.3458e-04
Loss = 1.0222e-01, PNorm = 135.5504, GNorm = 0.6494, lr_0 = 7.3408e-04
Loss = 8.8147e-02, PNorm = 135.6318, GNorm = 1.2581, lr_0 = 7.3358e-04
Loss = 9.9018e-02, PNorm = 135.7113, GNorm = 0.5474, lr_0 = 7.3308e-04
Loss = 9.9242e-02, PNorm = 135.7990, GNorm = 0.6417, lr_0 = 7.3257e-04
Loss = 9.7793e-02, PNorm = 135.8865, GNorm = 0.9613, lr_0 = 7.3207e-04
Loss = 8.8694e-02, PNorm = 135.9702, GNorm = 0.5452, lr_0 = 7.3157e-04
Loss = 8.9297e-02, PNorm = 136.0497, GNorm = 0.5787, lr_0 = 7.3107e-04
Loss = 9.1757e-02, PNorm = 136.1254, GNorm = 0.5987, lr_0 = 7.3057e-04
Loss = 9.2507e-02, PNorm = 136.2025, GNorm = 0.4688, lr_0 = 7.3007e-04
Loss = 8.2134e-02, PNorm = 136.2827, GNorm = 0.5765, lr_0 = 7.2957e-04
Loss = 1.0342e-01, PNorm = 136.3586, GNorm = 0.6515, lr_0 = 7.2907e-04
Loss = 1.0438e-01, PNorm = 136.4481, GNorm = 0.9165, lr_0 = 7.2857e-04
Loss = 9.3343e-02, PNorm = 136.5310, GNorm = 0.5799, lr_0 = 7.2807e-04
Loss = 9.2350e-02, PNorm = 136.6268, GNorm = 0.8524, lr_0 = 7.2757e-04
Loss = 9.4643e-02, PNorm = 136.7144, GNorm = 0.7745, lr_0 = 7.2707e-04
Loss = 7.2941e-02, PNorm = 136.8007, GNorm = 0.5495, lr_0 = 7.2657e-04
Loss = 9.4909e-02, PNorm = 136.8756, GNorm = 0.5871, lr_0 = 7.2608e-04
Loss = 8.0765e-02, PNorm = 136.9592, GNorm = 0.3509, lr_0 = 7.2558e-04
Loss = 1.0245e-01, PNorm = 137.0387, GNorm = 0.6652, lr_0 = 7.2508e-04
Loss = 8.8635e-02, PNorm = 137.1210, GNorm = 0.5684, lr_0 = 7.2458e-04
Loss = 1.0419e-01, PNorm = 137.2056, GNorm = 0.6842, lr_0 = 7.2409e-04
Loss = 8.2553e-02, PNorm = 137.2928, GNorm = 0.4221, lr_0 = 7.2359e-04
Loss = 8.7625e-02, PNorm = 137.3678, GNorm = 0.6469, lr_0 = 7.2310e-04
Loss = 1.0632e-01, PNorm = 137.4448, GNorm = 0.6976, lr_0 = 7.2260e-04
Loss = 8.9491e-02, PNorm = 137.5252, GNorm = 0.4258, lr_0 = 7.2211e-04
Loss = 1.0328e-01, PNorm = 137.6076, GNorm = 0.8915, lr_0 = 7.2161e-04
Loss = 9.5844e-02, PNorm = 137.6894, GNorm = 0.5498, lr_0 = 7.2112e-04
Loss = 1.3010e-01, PNorm = 137.7816, GNorm = 0.8861, lr_0 = 7.2062e-04
Loss = 8.4701e-02, PNorm = 137.8693, GNorm = 0.7406, lr_0 = 7.2013e-04
Loss = 1.1986e-01, PNorm = 137.9550, GNorm = 0.6427, lr_0 = 7.1964e-04
Validation mae = 0.123947
Epoch 6
Loss = 6.5750e-02, PNorm = 138.0357, GNorm = 0.3973, lr_0 = 7.1914e-04
Loss = 6.6371e-02, PNorm = 138.1043, GNorm = 0.5542, lr_0 = 7.1865e-04
Loss = 5.9195e-02, PNorm = 138.1645, GNorm = 0.5464, lr_0 = 7.1816e-04
Loss = 8.0303e-02, PNorm = 138.2250, GNorm = 0.6310, lr_0 = 7.1767e-04
Loss = 6.2079e-02, PNorm = 138.2769, GNorm = 0.5068, lr_0 = 7.1717e-04
Loss = 7.3976e-02, PNorm = 138.3253, GNorm = 0.5971, lr_0 = 7.1668e-04
Loss = 6.0006e-02, PNorm = 138.3732, GNorm = 0.5819, lr_0 = 7.1619e-04
Loss = 7.0467e-02, PNorm = 138.4267, GNorm = 0.6734, lr_0 = 7.1570e-04
Loss = 6.4471e-02, PNorm = 138.4719, GNorm = 0.5127, lr_0 = 7.1521e-04
Loss = 6.2713e-02, PNorm = 138.5295, GNorm = 0.6324, lr_0 = 7.1472e-04
Loss = 7.9095e-02, PNorm = 138.5843, GNorm = 0.4246, lr_0 = 7.1423e-04
Loss = 8.1234e-02, PNorm = 138.6439, GNorm = 0.5809, lr_0 = 7.1374e-04
Loss = 6.6316e-02, PNorm = 138.7005, GNorm = 0.6244, lr_0 = 7.1325e-04
Loss = 6.6952e-02, PNorm = 138.7542, GNorm = 0.8613, lr_0 = 7.1277e-04
Loss = 6.2212e-02, PNorm = 138.8111, GNorm = 0.6338, lr_0 = 7.1228e-04
Loss = 5.9934e-02, PNorm = 138.8689, GNorm = 0.6340, lr_0 = 7.1179e-04
Loss = 5.0413e-02, PNorm = 138.9211, GNorm = 0.4124, lr_0 = 7.1130e-04
Loss = 6.7151e-02, PNorm = 138.9783, GNorm = 0.7165, lr_0 = 7.1081e-04
Loss = 5.7486e-02, PNorm = 139.0350, GNorm = 0.4282, lr_0 = 7.1033e-04
Loss = 6.2541e-02, PNorm = 139.0956, GNorm = 0.4560, lr_0 = 7.0984e-04
Loss = 5.1472e-02, PNorm = 139.1486, GNorm = 0.4970, lr_0 = 7.0935e-04
Loss = 6.0848e-02, PNorm = 139.2006, GNorm = 0.4731, lr_0 = 7.0887e-04
Loss = 6.2023e-02, PNorm = 139.2479, GNorm = 0.5182, lr_0 = 7.0838e-04
Loss = 5.2107e-02, PNorm = 139.2993, GNorm = 0.4225, lr_0 = 7.0790e-04
Loss = 6.3999e-02, PNorm = 139.3575, GNorm = 0.6333, lr_0 = 7.0741e-04
Loss = 6.3574e-02, PNorm = 139.4074, GNorm = 0.5734, lr_0 = 7.0693e-04
Loss = 6.1603e-02, PNorm = 139.4647, GNorm = 0.5677, lr_0 = 7.0644e-04
Loss = 6.2852e-02, PNorm = 139.5213, GNorm = 0.6080, lr_0 = 7.0596e-04
Loss = 6.0732e-02, PNorm = 139.5727, GNorm = 0.3522, lr_0 = 7.0548e-04
Loss = 6.0444e-02, PNorm = 139.6225, GNorm = 0.4371, lr_0 = 7.0499e-04
Loss = 7.4255e-02, PNorm = 139.6764, GNorm = 1.0969, lr_0 = 7.0451e-04
Loss = 6.6179e-02, PNorm = 139.7386, GNorm = 0.8959, lr_0 = 7.0403e-04
Loss = 6.8202e-02, PNorm = 139.8061, GNorm = 0.4869, lr_0 = 7.0354e-04
Loss = 6.8725e-02, PNorm = 139.8645, GNorm = 0.6479, lr_0 = 7.0306e-04
Loss = 5.9124e-02, PNorm = 139.9308, GNorm = 0.4290, lr_0 = 7.0258e-04
Loss = 5.7275e-02, PNorm = 139.9881, GNorm = 0.3056, lr_0 = 7.0210e-04
Loss = 5.5456e-02, PNorm = 140.0456, GNorm = 0.9364, lr_0 = 7.0162e-04
Loss = 7.5661e-02, PNorm = 140.1089, GNorm = 0.6425, lr_0 = 7.0114e-04
Loss = 6.2444e-02, PNorm = 140.1731, GNorm = 0.3580, lr_0 = 7.0066e-04
Loss = 6.9979e-02, PNorm = 140.2318, GNorm = 0.5631, lr_0 = 7.0018e-04
Loss = 7.0588e-02, PNorm = 140.2945, GNorm = 0.6630, lr_0 = 6.9970e-04
Loss = 5.2056e-02, PNorm = 140.3549, GNorm = 0.4489, lr_0 = 6.9922e-04
Loss = 6.6621e-02, PNorm = 140.4083, GNorm = 0.6161, lr_0 = 6.9874e-04
Loss = 7.3456e-02, PNorm = 140.4660, GNorm = 0.6570, lr_0 = 6.9826e-04
Loss = 5.5869e-02, PNorm = 140.5303, GNorm = 0.5409, lr_0 = 6.9778e-04
Loss = 5.7863e-02, PNorm = 140.5861, GNorm = 0.7285, lr_0 = 6.9730e-04
Loss = 5.7291e-02, PNorm = 140.6433, GNorm = 0.5809, lr_0 = 6.9683e-04
Loss = 5.8129e-02, PNorm = 140.6924, GNorm = 1.1027, lr_0 = 6.9635e-04
Loss = 5.7071e-02, PNorm = 140.7528, GNorm = 0.6763, lr_0 = 6.9587e-04
Loss = 5.5811e-02, PNorm = 140.8079, GNorm = 0.3861, lr_0 = 6.9540e-04
Loss = 5.6679e-02, PNorm = 140.8630, GNorm = 0.4518, lr_0 = 6.9492e-04
Loss = 5.2429e-02, PNorm = 140.9151, GNorm = 0.6417, lr_0 = 6.9444e-04
Loss = 6.6016e-02, PNorm = 140.9692, GNorm = 0.4513, lr_0 = 6.9397e-04
Loss = 5.1344e-02, PNorm = 141.0218, GNorm = 0.5389, lr_0 = 6.9349e-04
Loss = 7.3471e-02, PNorm = 141.0696, GNorm = 0.6047, lr_0 = 6.9302e-04
Loss = 8.7333e-02, PNorm = 141.1254, GNorm = 1.3021, lr_0 = 6.9254e-04
Loss = 5.2242e-02, PNorm = 141.1866, GNorm = 0.5087, lr_0 = 6.9207e-04
Loss = 5.5859e-02, PNorm = 141.2471, GNorm = 0.7319, lr_0 = 6.9159e-04
Loss = 6.6307e-02, PNorm = 141.3036, GNorm = 0.6490, lr_0 = 6.9112e-04
Loss = 6.8040e-02, PNorm = 141.3681, GNorm = 0.5328, lr_0 = 6.9065e-04
Loss = 5.9912e-02, PNorm = 141.4296, GNorm = 0.7092, lr_0 = 6.9017e-04
Loss = 6.4827e-02, PNorm = 141.4900, GNorm = 0.4537, lr_0 = 6.8970e-04
Loss = 6.3171e-02, PNorm = 141.5472, GNorm = 0.7837, lr_0 = 6.8923e-04
Loss = 6.1287e-02, PNorm = 141.6074, GNorm = 0.4040, lr_0 = 6.8876e-04
Loss = 7.1238e-02, PNorm = 141.6713, GNorm = 0.5856, lr_0 = 6.8828e-04
Loss = 6.5681e-02, PNorm = 141.7375, GNorm = 0.5898, lr_0 = 6.8781e-04
Loss = 5.7600e-02, PNorm = 141.7989, GNorm = 0.4230, lr_0 = 6.8734e-04
Loss = 6.1100e-02, PNorm = 141.8583, GNorm = 0.7023, lr_0 = 6.8687e-04
Loss = 5.7587e-02, PNorm = 141.9169, GNorm = 0.5061, lr_0 = 6.8640e-04
Loss = 5.8387e-02, PNorm = 141.9768, GNorm = 0.4627, lr_0 = 6.8593e-04
Loss = 5.8655e-02, PNorm = 142.0366, GNorm = 0.5648, lr_0 = 6.8546e-04
Loss = 6.7347e-02, PNorm = 142.0979, GNorm = 0.4570, lr_0 = 6.8499e-04
Loss = 6.5711e-02, PNorm = 142.1577, GNorm = 0.4495, lr_0 = 6.8452e-04
Loss = 6.7544e-02, PNorm = 142.2195, GNorm = 0.3631, lr_0 = 6.8405e-04
Loss = 7.2730e-02, PNorm = 142.2820, GNorm = 0.5516, lr_0 = 6.8358e-04
Loss = 7.9264e-02, PNorm = 142.3502, GNorm = 1.3992, lr_0 = 6.8312e-04
Loss = 6.8838e-02, PNorm = 142.4238, GNorm = 0.7799, lr_0 = 6.8265e-04
Loss = 6.4125e-02, PNorm = 142.4922, GNorm = 0.6913, lr_0 = 6.8218e-04
Loss = 6.3042e-02, PNorm = 142.5568, GNorm = 0.7164, lr_0 = 6.8171e-04
Loss = 7.3492e-02, PNorm = 142.6146, GNorm = 0.4645, lr_0 = 6.8125e-04
Loss = 7.4901e-02, PNorm = 142.6776, GNorm = 0.3689, lr_0 = 6.8078e-04
Loss = 5.5348e-02, PNorm = 142.7453, GNorm = 0.5085, lr_0 = 6.8031e-04
Loss = 7.1054e-02, PNorm = 142.8022, GNorm = 0.4914, lr_0 = 6.7985e-04
Loss = 6.6601e-02, PNorm = 142.8668, GNorm = 0.4779, lr_0 = 6.7938e-04
Loss = 7.0633e-02, PNorm = 142.9321, GNorm = 0.9688, lr_0 = 6.7892e-04
Loss = 6.1692e-02, PNorm = 143.0047, GNorm = 0.6813, lr_0 = 6.7845e-04
Loss = 5.7821e-02, PNorm = 143.0655, GNorm = 0.7879, lr_0 = 6.7799e-04
Loss = 6.3567e-02, PNorm = 143.1236, GNorm = 0.8250, lr_0 = 6.7752e-04
Loss = 7.2919e-02, PNorm = 143.1876, GNorm = 0.5824, lr_0 = 6.7706e-04
Loss = 6.9512e-02, PNorm = 143.2551, GNorm = 0.3571, lr_0 = 6.7659e-04
Loss = 6.8686e-02, PNorm = 143.3242, GNorm = 0.5129, lr_0 = 6.7613e-04
Loss = 7.8462e-02, PNorm = 143.3944, GNorm = 0.9390, lr_0 = 6.7567e-04
Loss = 6.2878e-02, PNorm = 143.4705, GNorm = 0.4639, lr_0 = 6.7520e-04
Loss = 7.1241e-02, PNorm = 143.5346, GNorm = 0.8295, lr_0 = 6.7474e-04
Loss = 7.6470e-02, PNorm = 143.6022, GNorm = 0.5106, lr_0 = 6.7428e-04
Loss = 6.2882e-02, PNorm = 143.6717, GNorm = 0.5581, lr_0 = 6.7382e-04
Loss = 7.7739e-02, PNorm = 143.7426, GNorm = 0.5166, lr_0 = 6.7335e-04
Loss = 6.6871e-02, PNorm = 143.8207, GNorm = 0.3977, lr_0 = 6.7289e-04
Loss = 7.1126e-02, PNorm = 143.8915, GNorm = 0.7786, lr_0 = 6.7243e-04
Loss = 8.1519e-02, PNorm = 143.9508, GNorm = 1.0659, lr_0 = 6.7197e-04
Loss = 7.1054e-02, PNorm = 144.0280, GNorm = 0.5382, lr_0 = 6.7151e-04
Loss = 5.9491e-02, PNorm = 144.0969, GNorm = 0.7419, lr_0 = 6.7105e-04
Loss = 6.6324e-02, PNorm = 144.1626, GNorm = 0.4629, lr_0 = 6.7059e-04
Loss = 7.4000e-02, PNorm = 144.2379, GNorm = 0.6048, lr_0 = 6.7013e-04
Loss = 7.1206e-02, PNorm = 144.3129, GNorm = 0.7455, lr_0 = 6.6967e-04
Loss = 5.6599e-02, PNorm = 144.3867, GNorm = 0.3531, lr_0 = 6.6921e-04
Loss = 8.0695e-02, PNorm = 144.4530, GNorm = 0.5039, lr_0 = 6.6876e-04
Loss = 7.5002e-02, PNorm = 144.5203, GNorm = 0.4186, lr_0 = 6.6830e-04
Loss = 6.7983e-02, PNorm = 144.5930, GNorm = 0.3502, lr_0 = 6.6784e-04
Loss = 5.8780e-02, PNorm = 144.6607, GNorm = 0.6462, lr_0 = 6.6738e-04
Loss = 6.6368e-02, PNorm = 144.7251, GNorm = 0.4839, lr_0 = 6.6693e-04
Loss = 6.0810e-02, PNorm = 144.7929, GNorm = 0.3650, lr_0 = 6.6647e-04
Loss = 6.8991e-02, PNorm = 144.8604, GNorm = 1.2125, lr_0 = 6.6601e-04
Loss = 8.4566e-02, PNorm = 144.9307, GNorm = 0.4883, lr_0 = 6.6556e-04
Loss = 8.3116e-02, PNorm = 145.0081, GNorm = 0.6284, lr_0 = 6.6510e-04
Loss = 8.3891e-02, PNorm = 145.0841, GNorm = 0.6556, lr_0 = 6.6464e-04
Loss = 7.5391e-02, PNorm = 145.1610, GNorm = 0.4871, lr_0 = 6.6419e-04
Loss = 8.5270e-02, PNorm = 145.2389, GNorm = 0.6381, lr_0 = 6.6373e-04
Loss = 7.8629e-02, PNorm = 145.3153, GNorm = 0.5255, lr_0 = 6.6328e-04
Loss = 8.2984e-02, PNorm = 145.3909, GNorm = 0.5242, lr_0 = 6.6282e-04
Validation mae = 0.123374
Epoch 7
Loss = 6.0311e-02, PNorm = 145.4650, GNorm = 0.9999, lr_0 = 6.6237e-04
Loss = 5.2585e-02, PNorm = 145.5267, GNorm = 0.3906, lr_0 = 6.6192e-04
Loss = 5.4160e-02, PNorm = 145.5806, GNorm = 0.6175, lr_0 = 6.6146e-04
Loss = 4.9566e-02, PNorm = 145.6284, GNorm = 0.5498, lr_0 = 6.6101e-04
Loss = 5.2057e-02, PNorm = 145.6751, GNorm = 0.4564, lr_0 = 6.6056e-04
Loss = 5.4755e-02, PNorm = 145.7187, GNorm = 0.9230, lr_0 = 6.6011e-04
Loss = 5.4553e-02, PNorm = 145.7558, GNorm = 0.4559, lr_0 = 6.5965e-04
Loss = 5.1384e-02, PNorm = 145.8033, GNorm = 0.7155, lr_0 = 6.5920e-04
Loss = 5.8425e-02, PNorm = 145.8505, GNorm = 0.3587, lr_0 = 6.5875e-04
Loss = 4.7966e-02, PNorm = 145.8980, GNorm = 0.5361, lr_0 = 6.5830e-04
Loss = 5.4194e-02, PNorm = 145.9477, GNorm = 0.4134, lr_0 = 6.5785e-04
Loss = 4.0470e-02, PNorm = 145.9941, GNorm = 0.3437, lr_0 = 6.5740e-04
Loss = 4.5733e-02, PNorm = 146.0393, GNorm = 0.4341, lr_0 = 6.5695e-04
Loss = 4.7200e-02, PNorm = 146.0814, GNorm = 0.5098, lr_0 = 6.5650e-04
Loss = 5.1279e-02, PNorm = 146.1251, GNorm = 0.4437, lr_0 = 6.5605e-04
Loss = 5.3988e-02, PNorm = 146.1729, GNorm = 0.5061, lr_0 = 6.5560e-04
Loss = 4.5503e-02, PNorm = 146.2185, GNorm = 0.3353, lr_0 = 6.5515e-04
Loss = 4.0505e-02, PNorm = 146.2647, GNorm = 0.4070, lr_0 = 6.5470e-04
Loss = 4.6211e-02, PNorm = 146.3092, GNorm = 1.0428, lr_0 = 6.5425e-04
Loss = 4.9088e-02, PNorm = 146.3467, GNorm = 0.5278, lr_0 = 6.5380e-04
Loss = 4.8908e-02, PNorm = 146.3983, GNorm = 0.6162, lr_0 = 6.5335e-04
Loss = 4.6762e-02, PNorm = 146.4466, GNorm = 0.3889, lr_0 = 6.5291e-04
Loss = 4.5846e-02, PNorm = 146.4927, GNorm = 0.4440, lr_0 = 6.5246e-04
Loss = 5.1992e-02, PNorm = 146.5368, GNorm = 0.6678, lr_0 = 6.5201e-04
Loss = 4.6351e-02, PNorm = 146.5786, GNorm = 0.5477, lr_0 = 6.5157e-04
Loss = 5.5947e-02, PNorm = 146.6230, GNorm = 0.6262, lr_0 = 6.5112e-04
Loss = 4.5329e-02, PNorm = 146.6710, GNorm = 0.4556, lr_0 = 6.5067e-04
Loss = 5.7400e-02, PNorm = 146.7144, GNorm = 0.3557, lr_0 = 6.5023e-04
Loss = 5.3002e-02, PNorm = 146.7645, GNorm = 0.4034, lr_0 = 6.4978e-04
Loss = 4.9001e-02, PNorm = 146.8144, GNorm = 0.4623, lr_0 = 6.4934e-04
Loss = 4.2545e-02, PNorm = 146.8640, GNorm = 0.4283, lr_0 = 6.4889e-04
Loss = 4.8532e-02, PNorm = 146.9091, GNorm = 0.7583, lr_0 = 6.4845e-04
Loss = 4.5658e-02, PNorm = 146.9574, GNorm = 0.6828, lr_0 = 6.4800e-04
Loss = 5.5299e-02, PNorm = 147.0068, GNorm = 0.6733, lr_0 = 6.4756e-04
Loss = 4.7087e-02, PNorm = 147.0563, GNorm = 0.4485, lr_0 = 6.4712e-04
Loss = 6.0478e-02, PNorm = 147.1099, GNorm = 0.5102, lr_0 = 6.4667e-04
Loss = 5.1600e-02, PNorm = 147.1610, GNorm = 0.9363, lr_0 = 6.4623e-04
Loss = 4.6951e-02, PNorm = 147.2143, GNorm = 0.4584, lr_0 = 6.4579e-04
Loss = 4.5841e-02, PNorm = 147.2631, GNorm = 0.5037, lr_0 = 6.4534e-04
Loss = 5.0990e-02, PNorm = 147.3071, GNorm = 0.6577, lr_0 = 6.4490e-04
Loss = 6.6090e-02, PNorm = 147.3616, GNorm = 0.8928, lr_0 = 6.4446e-04
Loss = 4.4762e-02, PNorm = 147.4151, GNorm = 0.5984, lr_0 = 6.4402e-04
Loss = 5.7877e-02, PNorm = 147.4757, GNorm = 0.4119, lr_0 = 6.4358e-04
Loss = 5.7852e-02, PNorm = 147.5289, GNorm = 0.6936, lr_0 = 6.4314e-04
Loss = 5.1289e-02, PNorm = 147.5818, GNorm = 0.5977, lr_0 = 6.4270e-04
Loss = 4.9136e-02, PNorm = 147.6354, GNorm = 0.3589, lr_0 = 6.4226e-04
Loss = 5.3454e-02, PNorm = 147.6893, GNorm = 0.4638, lr_0 = 6.4182e-04
Loss = 4.3581e-02, PNorm = 147.7450, GNorm = 0.5737, lr_0 = 6.4138e-04
Loss = 4.4553e-02, PNorm = 147.7903, GNorm = 0.3814, lr_0 = 6.4094e-04
Loss = 4.1930e-02, PNorm = 147.8398, GNorm = 0.2811, lr_0 = 6.4050e-04
Loss = 4.0754e-02, PNorm = 147.8851, GNorm = 0.5135, lr_0 = 6.4006e-04
Loss = 3.9361e-02, PNorm = 147.9338, GNorm = 0.4216, lr_0 = 6.3962e-04
Loss = 4.8510e-02, PNorm = 147.9810, GNorm = 0.6788, lr_0 = 6.3918e-04
Loss = 4.5959e-02, PNorm = 148.0303, GNorm = 0.3315, lr_0 = 6.3874e-04
Loss = 4.3483e-02, PNorm = 148.0751, GNorm = 0.2982, lr_0 = 6.3831e-04
Loss = 4.7973e-02, PNorm = 148.1299, GNorm = 0.3200, lr_0 = 6.3787e-04
Loss = 4.5659e-02, PNorm = 148.1797, GNorm = 0.4357, lr_0 = 6.3743e-04
Loss = 6.1055e-02, PNorm = 148.2316, GNorm = 0.6643, lr_0 = 6.3700e-04
Loss = 5.1505e-02, PNorm = 148.2919, GNorm = 0.7167, lr_0 = 6.3656e-04
Loss = 5.5464e-02, PNorm = 148.3391, GNorm = 0.3710, lr_0 = 6.3612e-04
Loss = 4.3550e-02, PNorm = 148.3951, GNorm = 0.4082, lr_0 = 6.3569e-04
Loss = 5.0595e-02, PNorm = 148.4476, GNorm = 0.8482, lr_0 = 6.3525e-04
Loss = 5.1420e-02, PNorm = 148.4943, GNorm = 0.2703, lr_0 = 6.3482e-04
Loss = 5.1605e-02, PNorm = 148.5461, GNorm = 0.4093, lr_0 = 6.3438e-04
Loss = 4.9294e-02, PNorm = 148.5999, GNorm = 0.6590, lr_0 = 6.3395e-04
Loss = 5.3115e-02, PNorm = 148.6532, GNorm = 0.5353, lr_0 = 6.3351e-04
Loss = 5.7692e-02, PNorm = 148.7084, GNorm = 0.4086, lr_0 = 6.3308e-04
Loss = 4.7364e-02, PNorm = 148.7651, GNorm = 0.7915, lr_0 = 6.3265e-04
Loss = 5.0454e-02, PNorm = 148.8201, GNorm = 0.3573, lr_0 = 6.3221e-04
Loss = 4.7850e-02, PNorm = 148.8754, GNorm = 0.5488, lr_0 = 6.3178e-04
Loss = 5.3624e-02, PNorm = 148.9283, GNorm = 0.7370, lr_0 = 6.3135e-04
Loss = 4.7225e-02, PNorm = 148.9807, GNorm = 0.5282, lr_0 = 6.3091e-04
Loss = 5.0932e-02, PNorm = 149.0295, GNorm = 0.7457, lr_0 = 6.3048e-04
Loss = 5.3998e-02, PNorm = 149.0833, GNorm = 0.3820, lr_0 = 6.3005e-04
Loss = 4.3426e-02, PNorm = 149.1334, GNorm = 0.2863, lr_0 = 6.2962e-04
Loss = 4.4168e-02, PNorm = 149.1825, GNorm = 0.3665, lr_0 = 6.2919e-04
Loss = 6.3009e-02, PNorm = 149.2338, GNorm = 1.1049, lr_0 = 6.2876e-04
Loss = 5.5223e-02, PNorm = 149.2946, GNorm = 0.3955, lr_0 = 6.2833e-04
Loss = 5.4679e-02, PNorm = 149.3519, GNorm = 1.2473, lr_0 = 6.2789e-04
Loss = 4.5796e-02, PNorm = 149.4067, GNorm = 0.3295, lr_0 = 6.2746e-04
Loss = 5.4982e-02, PNorm = 149.4602, GNorm = 0.5793, lr_0 = 6.2703e-04
Loss = 4.8558e-02, PNorm = 149.5164, GNorm = 0.5307, lr_0 = 6.2661e-04
Loss = 5.2443e-02, PNorm = 149.5702, GNorm = 0.5490, lr_0 = 6.2618e-04
Loss = 6.1103e-02, PNorm = 149.6279, GNorm = 0.5757, lr_0 = 6.2575e-04
Loss = 4.4425e-02, PNorm = 149.6872, GNorm = 0.5442, lr_0 = 6.2532e-04
Loss = 5.3589e-02, PNorm = 149.7393, GNorm = 0.4013, lr_0 = 6.2489e-04
Loss = 5.1256e-02, PNorm = 149.7989, GNorm = 0.3311, lr_0 = 6.2446e-04
Loss = 4.6172e-02, PNorm = 149.8501, GNorm = 0.3883, lr_0 = 6.2403e-04
Loss = 5.3245e-02, PNorm = 149.9093, GNorm = 0.8215, lr_0 = 6.2361e-04
Loss = 6.5994e-02, PNorm = 149.9643, GNorm = 0.6952, lr_0 = 6.2318e-04
Loss = 4.9169e-02, PNorm = 150.0236, GNorm = 0.4802, lr_0 = 6.2275e-04
Loss = 5.8798e-02, PNorm = 150.0846, GNorm = 0.3995, lr_0 = 6.2233e-04
Loss = 4.3597e-02, PNorm = 150.1461, GNorm = 0.3311, lr_0 = 6.2190e-04
Loss = 5.5045e-02, PNorm = 150.2064, GNorm = 0.3322, lr_0 = 6.2147e-04
Loss = 5.0590e-02, PNorm = 150.2646, GNorm = 0.4617, lr_0 = 6.2105e-04
Loss = 5.2868e-02, PNorm = 150.3285, GNorm = 0.4080, lr_0 = 6.2062e-04
Loss = 5.0327e-02, PNorm = 150.3898, GNorm = 0.3094, lr_0 = 6.2020e-04
Loss = 5.4379e-02, PNorm = 150.4550, GNorm = 0.3686, lr_0 = 6.1977e-04
Loss = 4.6141e-02, PNorm = 150.5120, GNorm = 0.4542, lr_0 = 6.1935e-04
Loss = 5.3175e-02, PNorm = 150.5611, GNorm = 0.2798, lr_0 = 6.1892e-04
Loss = 5.4089e-02, PNorm = 150.6182, GNorm = 0.3406, lr_0 = 6.1850e-04
Loss = 7.1710e-02, PNorm = 150.6866, GNorm = 0.3910, lr_0 = 6.1808e-04
Loss = 5.5927e-02, PNorm = 150.7455, GNorm = 0.5971, lr_0 = 6.1765e-04
Loss = 5.9603e-02, PNorm = 150.8027, GNorm = 0.6861, lr_0 = 6.1723e-04
Loss = 5.8378e-02, PNorm = 150.8650, GNorm = 0.4922, lr_0 = 6.1681e-04
Loss = 5.4883e-02, PNorm = 150.9326, GNorm = 0.6521, lr_0 = 6.1638e-04
Loss = 4.9209e-02, PNorm = 150.9983, GNorm = 0.4699, lr_0 = 6.1596e-04
Loss = 5.7794e-02, PNorm = 151.0639, GNorm = 0.5760, lr_0 = 6.1554e-04
Loss = 5.0304e-02, PNorm = 151.1302, GNorm = 0.5230, lr_0 = 6.1512e-04
Loss = 5.9753e-02, PNorm = 151.1903, GNorm = 0.6196, lr_0 = 6.1470e-04
Loss = 4.7099e-02, PNorm = 151.2536, GNorm = 0.6382, lr_0 = 6.1428e-04
Loss = 6.3884e-02, PNorm = 151.3148, GNorm = 0.6310, lr_0 = 6.1385e-04
Loss = 5.3584e-02, PNorm = 151.3857, GNorm = 0.4357, lr_0 = 6.1343e-04
Loss = 5.6435e-02, PNorm = 151.4502, GNorm = 0.3503, lr_0 = 6.1301e-04
Loss = 6.4143e-02, PNorm = 151.5156, GNorm = 0.3971, lr_0 = 6.1259e-04
Loss = 5.8497e-02, PNorm = 151.5803, GNorm = 0.7654, lr_0 = 6.1217e-04
Loss = 5.2787e-02, PNorm = 151.6455, GNorm = 0.4345, lr_0 = 6.1175e-04
Loss = 5.7102e-02, PNorm = 151.7055, GNorm = 0.2918, lr_0 = 6.1134e-04
Loss = 5.8927e-02, PNorm = 151.7680, GNorm = 0.9413, lr_0 = 6.1092e-04
Loss = 5.3801e-02, PNorm = 151.8308, GNorm = 0.5619, lr_0 = 6.1050e-04
Validation mae = 0.123088
Epoch 8
Loss = 4.7112e-02, PNorm = 151.8829, GNorm = 0.5200, lr_0 = 6.1008e-04
Loss = 4.4799e-02, PNorm = 151.9307, GNorm = 0.2986, lr_0 = 6.0966e-04
Loss = 4.2005e-02, PNorm = 151.9735, GNorm = 0.2636, lr_0 = 6.0924e-04
Loss = 5.0060e-02, PNorm = 152.0122, GNorm = 0.9822, lr_0 = 6.0883e-04
Loss = 3.9257e-02, PNorm = 152.0627, GNorm = 0.3389, lr_0 = 6.0841e-04
Loss = 4.7504e-02, PNorm = 152.1071, GNorm = 0.4218, lr_0 = 6.0799e-04
Loss = 3.6708e-02, PNorm = 152.1540, GNorm = 0.3520, lr_0 = 6.0758e-04
Loss = 3.9122e-02, PNorm = 152.1939, GNorm = 0.6737, lr_0 = 6.0716e-04
Loss = 4.8366e-02, PNorm = 152.2318, GNorm = 0.3113, lr_0 = 6.0674e-04
Loss = 4.5424e-02, PNorm = 152.2705, GNorm = 0.4005, lr_0 = 6.0633e-04
Loss = 5.0143e-02, PNorm = 152.3171, GNorm = 0.8082, lr_0 = 6.0591e-04
Loss = 4.9770e-02, PNorm = 152.3636, GNorm = 0.2835, lr_0 = 6.0550e-04
Loss = 3.5141e-02, PNorm = 152.4105, GNorm = 0.4535, lr_0 = 6.0508e-04
Loss = 3.9584e-02, PNorm = 152.4500, GNorm = 0.2959, lr_0 = 6.0467e-04
Loss = 4.2376e-02, PNorm = 152.4895, GNorm = 0.5647, lr_0 = 6.0425e-04
Loss = 3.9590e-02, PNorm = 152.5217, GNorm = 0.3245, lr_0 = 6.0384e-04
Loss = 4.2958e-02, PNorm = 152.5609, GNorm = 0.4669, lr_0 = 6.0343e-04
Loss = 4.1859e-02, PNorm = 152.6029, GNorm = 0.4423, lr_0 = 6.0301e-04
Loss = 3.9675e-02, PNorm = 152.6454, GNorm = 0.3977, lr_0 = 6.0260e-04
Loss = 3.4301e-02, PNorm = 152.6892, GNorm = 0.2960, lr_0 = 6.0219e-04
Loss = 4.0993e-02, PNorm = 152.7283, GNorm = 0.3911, lr_0 = 6.0178e-04
Loss = 3.4913e-02, PNorm = 152.7592, GNorm = 0.3709, lr_0 = 6.0136e-04
Loss = 4.4330e-02, PNorm = 152.7919, GNorm = 0.6401, lr_0 = 6.0095e-04
Loss = 3.2445e-02, PNorm = 152.8306, GNorm = 0.6176, lr_0 = 6.0054e-04
Loss = 4.7181e-02, PNorm = 152.8675, GNorm = 0.6206, lr_0 = 6.0013e-04
Loss = 3.9234e-02, PNorm = 152.9103, GNorm = 0.6329, lr_0 = 5.9972e-04
Loss = 4.0050e-02, PNorm = 152.9501, GNorm = 0.3715, lr_0 = 5.9931e-04
Loss = 4.5728e-02, PNorm = 152.9926, GNorm = 0.3955, lr_0 = 5.9890e-04
Loss = 3.4541e-02, PNorm = 153.0349, GNorm = 0.5762, lr_0 = 5.9849e-04
Loss = 4.7482e-02, PNorm = 153.0793, GNorm = 0.4680, lr_0 = 5.9808e-04
Loss = 3.5195e-02, PNorm = 153.1231, GNorm = 0.2576, lr_0 = 5.9767e-04
Loss = 3.3480e-02, PNorm = 153.1631, GNorm = 0.5109, lr_0 = 5.9726e-04
Loss = 3.6205e-02, PNorm = 153.2025, GNorm = 0.6532, lr_0 = 5.9685e-04
Loss = 4.5790e-02, PNorm = 153.2445, GNorm = 0.4263, lr_0 = 5.9644e-04
Loss = 3.8769e-02, PNorm = 153.2798, GNorm = 0.6264, lr_0 = 5.9603e-04
Loss = 3.6756e-02, PNorm = 153.3230, GNorm = 0.4798, lr_0 = 5.9562e-04
Loss = 5.2592e-02, PNorm = 153.3626, GNorm = 0.4343, lr_0 = 5.9521e-04
Loss = 3.5580e-02, PNorm = 153.4051, GNorm = 0.5859, lr_0 = 5.9481e-04
Loss = 4.0444e-02, PNorm = 153.4472, GNorm = 0.2850, lr_0 = 5.9440e-04
Loss = 3.4184e-02, PNorm = 153.4911, GNorm = 0.3338, lr_0 = 5.9399e-04
Loss = 4.5063e-02, PNorm = 153.5329, GNorm = 0.3642, lr_0 = 5.9358e-04
Loss = 4.0570e-02, PNorm = 153.5776, GNorm = 0.5917, lr_0 = 5.9318e-04
Loss = 3.2349e-02, PNorm = 153.6245, GNorm = 0.3945, lr_0 = 5.9277e-04
Loss = 3.4259e-02, PNorm = 153.6691, GNorm = 0.3821, lr_0 = 5.9236e-04
Loss = 3.2888e-02, PNorm = 153.7101, GNorm = 0.4286, lr_0 = 5.9196e-04
Loss = 3.9045e-02, PNorm = 153.7486, GNorm = 0.2225, lr_0 = 5.9155e-04
Loss = 4.0382e-02, PNorm = 153.7878, GNorm = 0.5277, lr_0 = 5.9115e-04
Loss = 4.3340e-02, PNorm = 153.8348, GNorm = 0.4787, lr_0 = 5.9074e-04
Loss = 4.9306e-02, PNorm = 153.8806, GNorm = 0.2450, lr_0 = 5.9034e-04
Loss = 3.4713e-02, PNorm = 153.9303, GNorm = 0.3289, lr_0 = 5.8993e-04
Loss = 3.4221e-02, PNorm = 153.9738, GNorm = 0.3871, lr_0 = 5.8953e-04
Loss = 3.6989e-02, PNorm = 154.0129, GNorm = 0.3806, lr_0 = 5.8913e-04
Loss = 5.9775e-02, PNorm = 154.0612, GNorm = 0.9392, lr_0 = 5.8872e-04
Loss = 4.0948e-02, PNorm = 154.1114, GNorm = 0.3700, lr_0 = 5.8832e-04
Loss = 3.3679e-02, PNorm = 154.1575, GNorm = 0.2583, lr_0 = 5.8792e-04
Loss = 3.1659e-02, PNorm = 154.2004, GNorm = 0.3809, lr_0 = 5.8751e-04
Loss = 4.7320e-02, PNorm = 154.2474, GNorm = 0.3094, lr_0 = 5.8711e-04
Loss = 3.4936e-02, PNorm = 154.2923, GNorm = 0.4439, lr_0 = 5.8671e-04
Loss = 4.6685e-02, PNorm = 154.3357, GNorm = 0.4378, lr_0 = 5.8631e-04
Loss = 4.0900e-02, PNorm = 154.3785, GNorm = 0.4993, lr_0 = 5.8591e-04
Loss = 3.7177e-02, PNorm = 154.4243, GNorm = 0.4027, lr_0 = 5.8550e-04
Loss = 4.4188e-02, PNorm = 154.4742, GNorm = 1.2950, lr_0 = 5.8510e-04
Loss = 3.7051e-02, PNorm = 154.5226, GNorm = 0.5309, lr_0 = 5.8470e-04
Loss = 4.3311e-02, PNorm = 154.5735, GNorm = 0.3215, lr_0 = 5.8430e-04
Loss = 4.2358e-02, PNorm = 154.6156, GNorm = 0.5426, lr_0 = 5.8390e-04
Loss = 3.8778e-02, PNorm = 154.6579, GNorm = 0.3210, lr_0 = 5.8350e-04
Loss = 4.0494e-02, PNorm = 154.7030, GNorm = 0.7246, lr_0 = 5.8310e-04
Loss = 4.1779e-02, PNorm = 154.7496, GNorm = 0.3645, lr_0 = 5.8270e-04
Loss = 4.4479e-02, PNorm = 154.7946, GNorm = 0.2804, lr_0 = 5.8230e-04
Loss = 3.6809e-02, PNorm = 154.8414, GNorm = 0.5596, lr_0 = 5.8190e-04
Loss = 3.8706e-02, PNorm = 154.8898, GNorm = 0.3845, lr_0 = 5.8151e-04
Loss = 3.5245e-02, PNorm = 154.9390, GNorm = 0.3244, lr_0 = 5.8111e-04
Loss = 3.9551e-02, PNorm = 154.9864, GNorm = 0.6409, lr_0 = 5.8071e-04
Loss = 3.9050e-02, PNorm = 155.0311, GNorm = 0.4098, lr_0 = 5.8031e-04
Loss = 4.4755e-02, PNorm = 155.0775, GNorm = 0.7991, lr_0 = 5.7991e-04
Loss = 3.6789e-02, PNorm = 155.1272, GNorm = 0.5395, lr_0 = 5.7952e-04
Loss = 3.5917e-02, PNorm = 155.1741, GNorm = 0.3579, lr_0 = 5.7912e-04
Loss = 3.8395e-02, PNorm = 155.2233, GNorm = 0.6538, lr_0 = 5.7872e-04
Loss = 3.8708e-02, PNorm = 155.2652, GNorm = 0.8513, lr_0 = 5.7833e-04
Loss = 3.0729e-02, PNorm = 155.3123, GNorm = 0.2956, lr_0 = 5.7793e-04
Loss = 3.6275e-02, PNorm = 155.3598, GNorm = 0.4704, lr_0 = 5.7753e-04
Loss = 3.8926e-02, PNorm = 155.4053, GNorm = 0.2593, lr_0 = 5.7714e-04
Loss = 5.1719e-02, PNorm = 155.4465, GNorm = 0.8503, lr_0 = 5.7674e-04
Loss = 3.5837e-02, PNorm = 155.4917, GNorm = 0.3247, lr_0 = 5.7635e-04
Loss = 3.8613e-02, PNorm = 155.5384, GNorm = 0.5606, lr_0 = 5.7595e-04
Loss = 4.0713e-02, PNorm = 155.5887, GNorm = 0.7355, lr_0 = 5.7556e-04
Loss = 4.6313e-02, PNorm = 155.6365, GNorm = 0.4820, lr_0 = 5.7516e-04
Loss = 5.2610e-02, PNorm = 155.6854, GNorm = 0.6465, lr_0 = 5.7477e-04
Loss = 4.7444e-02, PNorm = 155.7422, GNorm = 0.5906, lr_0 = 5.7438e-04
Loss = 4.1234e-02, PNorm = 155.7986, GNorm = 0.6452, lr_0 = 5.7398e-04
Loss = 4.2429e-02, PNorm = 155.8543, GNorm = 0.3383, lr_0 = 5.7359e-04
Loss = 5.0099e-02, PNorm = 155.9083, GNorm = 1.1589, lr_0 = 5.7320e-04
Loss = 3.7036e-02, PNorm = 155.9634, GNorm = 0.6706, lr_0 = 5.7280e-04
Loss = 4.0376e-02, PNorm = 156.0196, GNorm = 0.5478, lr_0 = 5.7241e-04
Loss = 5.0572e-02, PNorm = 156.0721, GNorm = 0.4327, lr_0 = 5.7202e-04
Loss = 4.3746e-02, PNorm = 156.1201, GNorm = 0.3398, lr_0 = 5.7163e-04
Loss = 4.7350e-02, PNorm = 156.1726, GNorm = 0.8222, lr_0 = 5.7124e-04
Loss = 3.6795e-02, PNorm = 156.2218, GNorm = 0.4980, lr_0 = 5.7084e-04
Loss = 3.4656e-02, PNorm = 156.2711, GNorm = 0.2792, lr_0 = 5.7045e-04
Loss = 5.8326e-02, PNorm = 156.3232, GNorm = 0.3541, lr_0 = 5.7006e-04
Loss = 4.4932e-02, PNorm = 156.3754, GNorm = 0.6916, lr_0 = 5.6967e-04
Loss = 4.5255e-02, PNorm = 156.4291, GNorm = 0.2331, lr_0 = 5.6928e-04
Loss = 4.6540e-02, PNorm = 156.4752, GNorm = 0.3393, lr_0 = 5.6889e-04
Loss = 4.5333e-02, PNorm = 156.5269, GNorm = 0.4833, lr_0 = 5.6850e-04
Loss = 3.5906e-02, PNorm = 156.5783, GNorm = 0.6177, lr_0 = 5.6811e-04
Loss = 4.4297e-02, PNorm = 156.6287, GNorm = 0.6782, lr_0 = 5.6772e-04
Loss = 3.9076e-02, PNorm = 156.6832, GNorm = 0.4748, lr_0 = 5.6733e-04
Loss = 4.4845e-02, PNorm = 156.7321, GNorm = 0.3306, lr_0 = 5.6695e-04
Loss = 3.2130e-02, PNorm = 156.7861, GNorm = 0.2854, lr_0 = 5.6656e-04
Loss = 4.3505e-02, PNorm = 156.8403, GNorm = 0.4630, lr_0 = 5.6617e-04
Loss = 4.0233e-02, PNorm = 156.8932, GNorm = 0.2991, lr_0 = 5.6578e-04
Loss = 5.0493e-02, PNorm = 156.9449, GNorm = 0.3425, lr_0 = 5.6539e-04
Loss = 4.5172e-02, PNorm = 156.9956, GNorm = 0.4245, lr_0 = 5.6501e-04
Loss = 5.3871e-02, PNorm = 157.0494, GNorm = 0.4168, lr_0 = 5.6462e-04
Loss = 4.0842e-02, PNorm = 157.1062, GNorm = 0.2804, lr_0 = 5.6423e-04
Loss = 5.0044e-02, PNorm = 157.1582, GNorm = 0.4915, lr_0 = 5.6385e-04
Loss = 4.2920e-02, PNorm = 157.2071, GNorm = 1.2802, lr_0 = 5.6346e-04
Loss = 4.5708e-02, PNorm = 157.2563, GNorm = 0.3388, lr_0 = 5.6307e-04
Loss = 3.3262e-02, PNorm = 157.3080, GNorm = 0.3728, lr_0 = 5.6269e-04
Loss = 4.7803e-02, PNorm = 157.3552, GNorm = 0.4520, lr_0 = 5.6230e-04
Validation mae = 0.122316
Epoch 9
Loss = 3.2084e-02, PNorm = 157.3948, GNorm = 0.2470, lr_0 = 5.6192e-04
Loss = 3.7681e-02, PNorm = 157.4275, GNorm = 0.3255, lr_0 = 5.6153e-04
Loss = 3.2237e-02, PNorm = 157.4607, GNorm = 0.3605, lr_0 = 5.6115e-04
Loss = 3.0031e-02, PNorm = 157.4948, GNorm = 0.4028, lr_0 = 5.6076e-04
Loss = 3.5457e-02, PNorm = 157.5289, GNorm = 0.5695, lr_0 = 5.6038e-04
Loss = 3.3183e-02, PNorm = 157.5651, GNorm = 0.3193, lr_0 = 5.6000e-04
Loss = 3.0444e-02, PNorm = 157.5991, GNorm = 0.4209, lr_0 = 5.5961e-04
Loss = 3.2512e-02, PNorm = 157.6322, GNorm = 0.3491, lr_0 = 5.5923e-04
Loss = 2.7510e-02, PNorm = 157.6657, GNorm = 0.9849, lr_0 = 5.5885e-04
Loss = 3.9266e-02, PNorm = 157.7049, GNorm = 0.3481, lr_0 = 5.5846e-04
Loss = 2.7324e-02, PNorm = 157.7353, GNorm = 0.5014, lr_0 = 5.5808e-04
Loss = 3.5644e-02, PNorm = 157.7725, GNorm = 0.7066, lr_0 = 5.5770e-04
Loss = 3.4116e-02, PNorm = 157.7995, GNorm = 0.5083, lr_0 = 5.5732e-04
Loss = 3.3312e-02, PNorm = 157.8321, GNorm = 0.4519, lr_0 = 5.5693e-04
Loss = 3.1644e-02, PNorm = 157.8654, GNorm = 0.5316, lr_0 = 5.5655e-04
Loss = 3.6579e-02, PNorm = 157.8932, GNorm = 0.4919, lr_0 = 5.5617e-04
Loss = 3.7261e-02, PNorm = 157.9250, GNorm = 0.2272, lr_0 = 5.5579e-04
Loss = 3.1777e-02, PNorm = 157.9537, GNorm = 0.3352, lr_0 = 5.5541e-04
Loss = 3.6268e-02, PNorm = 157.9919, GNorm = 0.1685, lr_0 = 5.5503e-04
Loss = 3.0151e-02, PNorm = 158.0265, GNorm = 0.3152, lr_0 = 5.5465e-04
Loss = 3.5736e-02, PNorm = 158.0592, GNorm = 0.2200, lr_0 = 5.5427e-04
Loss = 3.8605e-02, PNorm = 158.0976, GNorm = 0.3040, lr_0 = 5.5389e-04
Loss = 3.9036e-02, PNorm = 158.1350, GNorm = 0.5981, lr_0 = 5.5351e-04
Loss = 3.4746e-02, PNorm = 158.1720, GNorm = 0.3262, lr_0 = 5.5313e-04
Loss = 3.3992e-02, PNorm = 158.2113, GNorm = 1.0863, lr_0 = 5.5275e-04
Loss = 2.6858e-02, PNorm = 158.2440, GNorm = 0.3986, lr_0 = 5.5237e-04
Loss = 4.4254e-02, PNorm = 158.2737, GNorm = 0.3349, lr_0 = 5.5199e-04
Loss = 4.2110e-02, PNorm = 158.3103, GNorm = 1.7297, lr_0 = 5.5162e-04
Loss = 2.6313e-02, PNorm = 158.3475, GNorm = 0.2592, lr_0 = 5.5124e-04
Loss = 3.8273e-02, PNorm = 158.3818, GNorm = 1.0888, lr_0 = 5.5086e-04
Loss = 2.9853e-02, PNorm = 158.4209, GNorm = 0.9916, lr_0 = 5.5048e-04
Loss = 3.2175e-02, PNorm = 158.4527, GNorm = 0.4775, lr_0 = 5.5011e-04
Loss = 3.1635e-02, PNorm = 158.4904, GNorm = 0.2835, lr_0 = 5.4973e-04
Loss = 2.6868e-02, PNorm = 158.5272, GNorm = 0.4056, lr_0 = 5.4935e-04
Loss = 3.4057e-02, PNorm = 158.5519, GNorm = 0.5365, lr_0 = 5.4898e-04
Loss = 2.9022e-02, PNorm = 158.5861, GNorm = 0.3880, lr_0 = 5.4860e-04
Loss = 3.3270e-02, PNorm = 158.6160, GNorm = 0.2983, lr_0 = 5.4822e-04
Loss = 3.6172e-02, PNorm = 158.6496, GNorm = 0.2299, lr_0 = 5.4785e-04
Loss = 2.6907e-02, PNorm = 158.6881, GNorm = 0.4793, lr_0 = 5.4747e-04
Loss = 3.1738e-02, PNorm = 158.7197, GNorm = 0.6082, lr_0 = 5.4710e-04
Loss = 4.5762e-02, PNorm = 158.7534, GNorm = 0.4160, lr_0 = 5.4672e-04
Loss = 3.1169e-02, PNorm = 158.7879, GNorm = 0.3599, lr_0 = 5.4635e-04
Loss = 3.2866e-02, PNorm = 158.8216, GNorm = 0.7381, lr_0 = 5.4597e-04
Loss = 3.4343e-02, PNorm = 158.8540, GNorm = 0.2740, lr_0 = 5.4560e-04
Loss = 3.6344e-02, PNorm = 158.8947, GNorm = 0.7601, lr_0 = 5.4523e-04
Loss = 3.1574e-02, PNorm = 158.9332, GNorm = 0.3062, lr_0 = 5.4485e-04
Loss = 3.6352e-02, PNorm = 158.9706, GNorm = 0.4816, lr_0 = 5.4448e-04
Loss = 2.9938e-02, PNorm = 159.0079, GNorm = 0.4183, lr_0 = 5.4411e-04
Loss = 2.4021e-02, PNorm = 159.0460, GNorm = 0.7096, lr_0 = 5.4373e-04
Loss = 2.8350e-02, PNorm = 159.0725, GNorm = 0.4184, lr_0 = 5.4336e-04
Loss = 2.9523e-02, PNorm = 159.1075, GNorm = 0.4069, lr_0 = 5.4299e-04
Loss = 2.8471e-02, PNorm = 159.1449, GNorm = 0.5050, lr_0 = 5.4262e-04
Loss = 3.5190e-02, PNorm = 159.1842, GNorm = 0.3519, lr_0 = 5.4225e-04
Loss = 3.5929e-02, PNorm = 159.2255, GNorm = 0.4317, lr_0 = 5.4187e-04
Loss = 3.0285e-02, PNorm = 159.2633, GNorm = 0.3244, lr_0 = 5.4150e-04
Loss = 2.6698e-02, PNorm = 159.3029, GNorm = 0.4036, lr_0 = 5.4113e-04
Loss = 3.4526e-02, PNorm = 159.3429, GNorm = 0.3619, lr_0 = 5.4076e-04
Loss = 3.1197e-02, PNorm = 159.3827, GNorm = 0.2546, lr_0 = 5.4039e-04
Loss = 3.1063e-02, PNorm = 159.4191, GNorm = 0.3981, lr_0 = 5.4002e-04
Loss = 3.0277e-02, PNorm = 159.4520, GNorm = 0.2844, lr_0 = 5.3965e-04
Loss = 3.5656e-02, PNorm = 159.4864, GNorm = 0.3318, lr_0 = 5.3928e-04
Loss = 3.0989e-02, PNorm = 159.5240, GNorm = 0.3534, lr_0 = 5.3891e-04
Loss = 3.8047e-02, PNorm = 159.5619, GNorm = 0.3019, lr_0 = 5.3854e-04
Loss = 2.6471e-02, PNorm = 159.5983, GNorm = 0.2383, lr_0 = 5.3817e-04
Loss = 3.3423e-02, PNorm = 159.6375, GNorm = 0.7098, lr_0 = 5.3781e-04
Loss = 2.8398e-02, PNorm = 159.6727, GNorm = 0.2511, lr_0 = 5.3744e-04
Loss = 3.4098e-02, PNorm = 159.7108, GNorm = 0.6686, lr_0 = 5.3707e-04
Loss = 3.1181e-02, PNorm = 159.7468, GNorm = 0.5322, lr_0 = 5.3670e-04
Loss = 3.7890e-02, PNorm = 159.7908, GNorm = 0.3901, lr_0 = 5.3633e-04
Loss = 2.9582e-02, PNorm = 159.8357, GNorm = 0.3794, lr_0 = 5.3597e-04
Loss = 3.6063e-02, PNorm = 159.8781, GNorm = 0.3804, lr_0 = 5.3560e-04
Loss = 2.6189e-02, PNorm = 159.9189, GNorm = 0.4432, lr_0 = 5.3523e-04
Loss = 6.3731e-02, PNorm = 159.9597, GNorm = 0.3757, lr_0 = 5.3486e-04
Loss = 3.4678e-02, PNorm = 159.9972, GNorm = 0.5303, lr_0 = 5.3450e-04
Loss = 3.1010e-02, PNorm = 160.0403, GNorm = 0.4603, lr_0 = 5.3413e-04
Loss = 3.1294e-02, PNorm = 160.0837, GNorm = 0.2007, lr_0 = 5.3377e-04
Loss = 4.5877e-02, PNorm = 160.1220, GNorm = 0.3537, lr_0 = 5.3340e-04
Loss = 3.1746e-02, PNorm = 160.1668, GNorm = 0.4639, lr_0 = 5.3304e-04
Loss = 3.6687e-02, PNorm = 160.2075, GNorm = 0.7056, lr_0 = 5.3267e-04
Loss = 4.2085e-02, PNorm = 160.2481, GNorm = 0.4230, lr_0 = 5.3231e-04
Loss = 3.2659e-02, PNorm = 160.2796, GNorm = 0.3112, lr_0 = 5.3194e-04
Loss = 4.1634e-02, PNorm = 160.3231, GNorm = 0.6175, lr_0 = 5.3158e-04
Loss = 3.5312e-02, PNorm = 160.3654, GNorm = 0.3743, lr_0 = 5.3121e-04
Loss = 3.4404e-02, PNorm = 160.4012, GNorm = 0.2941, lr_0 = 5.3085e-04
Loss = 3.6417e-02, PNorm = 160.4430, GNorm = 0.3663, lr_0 = 5.3048e-04
Loss = 3.8828e-02, PNorm = 160.4874, GNorm = 0.4575, lr_0 = 5.3012e-04
Loss = 2.6740e-02, PNorm = 160.5297, GNorm = 0.3173, lr_0 = 5.2976e-04
Loss = 3.4346e-02, PNorm = 160.5731, GNorm = 0.3977, lr_0 = 5.2939e-04
Loss = 3.6724e-02, PNorm = 160.6141, GNorm = 0.3481, lr_0 = 5.2903e-04
Loss = 3.7942e-02, PNorm = 160.6530, GNorm = 0.4683, lr_0 = 5.2867e-04
Loss = 2.7741e-02, PNorm = 160.6865, GNorm = 0.4522, lr_0 = 5.2831e-04
Loss = 3.3449e-02, PNorm = 160.7270, GNorm = 0.4281, lr_0 = 5.2795e-04
Loss = 2.9540e-02, PNorm = 160.7705, GNorm = 0.3903, lr_0 = 5.2758e-04
Loss = 2.6583e-02, PNorm = 160.8081, GNorm = 0.3264, lr_0 = 5.2722e-04
Loss = 2.9550e-02, PNorm = 160.8454, GNorm = 0.7705, lr_0 = 5.2686e-04
Loss = 4.0239e-02, PNorm = 160.8885, GNorm = 0.5438, lr_0 = 5.2650e-04
Loss = 3.2830e-02, PNorm = 160.9316, GNorm = 0.3939, lr_0 = 5.2614e-04
Loss = 3.6803e-02, PNorm = 160.9720, GNorm = 0.5517, lr_0 = 5.2578e-04
Loss = 3.2648e-02, PNorm = 161.0163, GNorm = 0.5077, lr_0 = 5.2542e-04
Loss = 3.5651e-02, PNorm = 161.0574, GNorm = 0.2602, lr_0 = 5.2506e-04
Loss = 3.6071e-02, PNorm = 161.0963, GNorm = 0.7288, lr_0 = 5.2470e-04
Loss = 2.4517e-02, PNorm = 161.1360, GNorm = 0.4162, lr_0 = 5.2434e-04
Loss = 2.8888e-02, PNorm = 161.1788, GNorm = 0.3486, lr_0 = 5.2398e-04
Loss = 3.4660e-02, PNorm = 161.2169, GNorm = 0.3981, lr_0 = 5.2362e-04
Loss = 3.9289e-02, PNorm = 161.2591, GNorm = 0.3419, lr_0 = 5.2326e-04
Loss = 4.0174e-02, PNorm = 161.3028, GNorm = 0.5406, lr_0 = 5.2290e-04
Loss = 2.9925e-02, PNorm = 161.3386, GNorm = 0.3196, lr_0 = 5.2255e-04
Loss = 3.5676e-02, PNorm = 161.3844, GNorm = 0.4496, lr_0 = 5.2219e-04
Loss = 4.0558e-02, PNorm = 161.4268, GNorm = 0.2595, lr_0 = 5.2183e-04
Loss = 3.4857e-02, PNorm = 161.4699, GNorm = 0.3472, lr_0 = 5.2147e-04
Loss = 3.4222e-02, PNorm = 161.5102, GNorm = 0.3051, lr_0 = 5.2112e-04
Loss = 2.8250e-02, PNorm = 161.5526, GNorm = 0.3905, lr_0 = 5.2076e-04
Loss = 3.3532e-02, PNorm = 161.5900, GNorm = 0.6295, lr_0 = 5.2040e-04
Loss = 3.0871e-02, PNorm = 161.6327, GNorm = 0.5732, lr_0 = 5.2005e-04
Loss = 2.8468e-02, PNorm = 161.6706, GNorm = 1.2937, lr_0 = 5.1969e-04
Loss = 3.3731e-02, PNorm = 161.7035, GNorm = 0.6888, lr_0 = 5.1933e-04
Loss = 3.5407e-02, PNorm = 161.7442, GNorm = 0.2937, lr_0 = 5.1898e-04
Loss = 3.2608e-02, PNorm = 161.7822, GNorm = 0.3450, lr_0 = 5.1862e-04
Loss = 3.2903e-02, PNorm = 161.8193, GNorm = 0.3928, lr_0 = 5.1827e-04
Loss = 2.9920e-02, PNorm = 161.8624, GNorm = 0.5159, lr_0 = 5.1791e-04
Validation mae = 0.123090
Epoch 10
Loss = 3.4680e-02, PNorm = 161.8990, GNorm = 0.3178, lr_0 = 5.1756e-04
Loss = 2.6555e-02, PNorm = 161.9263, GNorm = 0.4541, lr_0 = 5.1720e-04
Loss = 2.9477e-02, PNorm = 161.9611, GNorm = 0.3699, lr_0 = 5.1685e-04
Loss = 3.1329e-02, PNorm = 161.9895, GNorm = 0.2827, lr_0 = 5.1649e-04
Loss = 2.7060e-02, PNorm = 162.0218, GNorm = 0.3038, lr_0 = 5.1614e-04
Loss = 2.5851e-02, PNorm = 162.0510, GNorm = 0.3994, lr_0 = 5.1579e-04
Loss = 3.0026e-02, PNorm = 162.0808, GNorm = 0.2253, lr_0 = 5.1543e-04
Loss = 2.8442e-02, PNorm = 162.1096, GNorm = 0.2864, lr_0 = 5.1508e-04
Loss = 2.7124e-02, PNorm = 162.1350, GNorm = 0.3068, lr_0 = 5.1473e-04
Loss = 2.6223e-02, PNorm = 162.1602, GNorm = 0.3451, lr_0 = 5.1437e-04
Loss = 2.5645e-02, PNorm = 162.1835, GNorm = 0.5068, lr_0 = 5.1402e-04
Loss = 2.6406e-02, PNorm = 162.2112, GNorm = 0.2502, lr_0 = 5.1367e-04
Loss = 2.9964e-02, PNorm = 162.2396, GNorm = 0.3816, lr_0 = 5.1332e-04
Loss = 3.2041e-02, PNorm = 162.2691, GNorm = 0.3949, lr_0 = 5.1297e-04
Loss = 2.7545e-02, PNorm = 162.2983, GNorm = 0.2213, lr_0 = 5.1262e-04
Loss = 2.6004e-02, PNorm = 162.3255, GNorm = 0.2316, lr_0 = 5.1226e-04
Loss = 3.0191e-02, PNorm = 162.3568, GNorm = 0.4053, lr_0 = 5.1191e-04
Loss = 2.5008e-02, PNorm = 162.3878, GNorm = 0.2267, lr_0 = 5.1156e-04
Loss = 2.3844e-02, PNorm = 162.4139, GNorm = 0.3168, lr_0 = 5.1121e-04
Loss = 2.4762e-02, PNorm = 162.4428, GNorm = 0.2721, lr_0 = 5.1086e-04
Loss = 2.5167e-02, PNorm = 162.4691, GNorm = 0.3476, lr_0 = 5.1051e-04
Loss = 2.3430e-02, PNorm = 162.4971, GNorm = 0.2450, lr_0 = 5.1016e-04
Loss = 3.4217e-02, PNorm = 162.5252, GNorm = 0.6065, lr_0 = 5.0981e-04
Loss = 2.5076e-02, PNorm = 162.5532, GNorm = 0.4000, lr_0 = 5.0946e-04
Loss = 2.8485e-02, PNorm = 162.5859, GNorm = 0.3777, lr_0 = 5.0911e-04
Loss = 2.8223e-02, PNorm = 162.6150, GNorm = 0.4727, lr_0 = 5.0877e-04
Loss = 2.7666e-02, PNorm = 162.6435, GNorm = 0.3490, lr_0 = 5.0842e-04
Loss = 2.9599e-02, PNorm = 162.6704, GNorm = 0.4358, lr_0 = 5.0807e-04
Loss = 2.6308e-02, PNorm = 162.6967, GNorm = 0.3444, lr_0 = 5.0772e-04
Loss = 2.7630e-02, PNorm = 162.7263, GNorm = 0.4669, lr_0 = 5.0737e-04
Loss = 2.8751e-02, PNorm = 162.7454, GNorm = 0.4137, lr_0 = 5.0703e-04
Loss = 2.8123e-02, PNorm = 162.7750, GNorm = 0.2951, lr_0 = 5.0668e-04
Loss = 2.9534e-02, PNorm = 162.8118, GNorm = 0.4120, lr_0 = 5.0633e-04
Loss = 2.7064e-02, PNorm = 162.8421, GNorm = 0.6032, lr_0 = 5.0598e-04
Loss = 3.2582e-02, PNorm = 162.8716, GNorm = 0.3384, lr_0 = 5.0564e-04
Loss = 3.4074e-02, PNorm = 162.8997, GNorm = 0.9698, lr_0 = 5.0529e-04
Loss = 3.1222e-02, PNorm = 162.9317, GNorm = 0.4985, lr_0 = 5.0494e-04
Loss = 2.1870e-02, PNorm = 162.9638, GNorm = 0.4162, lr_0 = 5.0460e-04
Loss = 2.8511e-02, PNorm = 162.9981, GNorm = 0.3233, lr_0 = 5.0425e-04
Loss = 2.3050e-02, PNorm = 163.0286, GNorm = 0.4590, lr_0 = 5.0391e-04
Loss = 2.5433e-02, PNorm = 163.0583, GNorm = 0.4017, lr_0 = 5.0356e-04
Loss = 2.0990e-02, PNorm = 163.0863, GNorm = 0.2060, lr_0 = 5.0322e-04
Loss = 2.4205e-02, PNorm = 163.1194, GNorm = 0.4194, lr_0 = 5.0287e-04
Loss = 2.8181e-02, PNorm = 163.1455, GNorm = 0.5858, lr_0 = 5.0253e-04
Loss = 3.0472e-02, PNorm = 163.1790, GNorm = 0.5912, lr_0 = 5.0218e-04
Loss = 3.2656e-02, PNorm = 163.2077, GNorm = 0.5996, lr_0 = 5.0184e-04
Loss = 2.3668e-02, PNorm = 163.2394, GNorm = 0.4535, lr_0 = 5.0150e-04
Loss = 2.7281e-02, PNorm = 163.2641, GNorm = 0.1917, lr_0 = 5.0115e-04
Loss = 2.4748e-02, PNorm = 163.2893, GNorm = 0.4865, lr_0 = 5.0081e-04
Loss = 2.8711e-02, PNorm = 163.3216, GNorm = 0.3517, lr_0 = 5.0047e-04
Loss = 2.5613e-02, PNorm = 163.3502, GNorm = 0.3738, lr_0 = 5.0012e-04
Loss = 2.7954e-02, PNorm = 163.3863, GNorm = 0.4751, lr_0 = 4.9978e-04
Loss = 2.6549e-02, PNorm = 163.4213, GNorm = 0.3320, lr_0 = 4.9944e-04
Loss = 2.7805e-02, PNorm = 163.4493, GNorm = 0.3501, lr_0 = 4.9910e-04
Loss = 2.4393e-02, PNorm = 163.4787, GNorm = 0.3727, lr_0 = 4.9875e-04
Loss = 3.1344e-02, PNorm = 163.5070, GNorm = 0.3792, lr_0 = 4.9841e-04
Loss = 2.5689e-02, PNorm = 163.5395, GNorm = 0.3810, lr_0 = 4.9807e-04
Loss = 2.7773e-02, PNorm = 163.5729, GNorm = 0.3076, lr_0 = 4.9773e-04
Loss = 2.3540e-02, PNorm = 163.6065, GNorm = 0.4033, lr_0 = 4.9739e-04
Loss = 2.5024e-02, PNorm = 163.6355, GNorm = 0.4592, lr_0 = 4.9705e-04
Loss = 2.6341e-02, PNorm = 163.6676, GNorm = 0.5885, lr_0 = 4.9671e-04
Loss = 2.4412e-02, PNorm = 163.7011, GNorm = 0.3050, lr_0 = 4.9637e-04
Loss = 2.8932e-02, PNorm = 163.7293, GNorm = 0.2557, lr_0 = 4.9603e-04
Loss = 2.8536e-02, PNorm = 163.7627, GNorm = 0.4097, lr_0 = 4.9569e-04
Loss = 2.4661e-02, PNorm = 163.7966, GNorm = 0.4464, lr_0 = 4.9535e-04
Loss = 2.5209e-02, PNorm = 163.8325, GNorm = 0.2380, lr_0 = 4.9501e-04
Loss = 5.5532e-02, PNorm = 163.8702, GNorm = 2.1104, lr_0 = 4.9467e-04
Loss = 2.4688e-02, PNorm = 163.9032, GNorm = 0.2142, lr_0 = 4.9433e-04
Loss = 2.8802e-02, PNorm = 163.9381, GNorm = 0.2548, lr_0 = 4.9399e-04
Loss = 2.1846e-02, PNorm = 163.9726, GNorm = 0.4184, lr_0 = 4.9365e-04
Loss = 3.0462e-02, PNorm = 164.0071, GNorm = 0.2156, lr_0 = 4.9332e-04
Loss = 2.8486e-02, PNorm = 164.0419, GNorm = 0.5491, lr_0 = 4.9298e-04
Loss = 2.6012e-02, PNorm = 164.0760, GNorm = 0.2298, lr_0 = 4.9264e-04
Loss = 2.4792e-02, PNorm = 164.1077, GNorm = 0.3107, lr_0 = 4.9230e-04
Loss = 2.2309e-02, PNorm = 164.1415, GNorm = 0.2255, lr_0 = 4.9197e-04
Loss = 2.4049e-02, PNorm = 164.1704, GNorm = 0.5263, lr_0 = 4.9163e-04
Loss = 2.2735e-02, PNorm = 164.2014, GNorm = 0.2550, lr_0 = 4.9129e-04
Loss = 2.2764e-02, PNorm = 164.2342, GNorm = 0.1843, lr_0 = 4.9095e-04
Loss = 2.7221e-02, PNorm = 164.2677, GNorm = 0.3267, lr_0 = 4.9062e-04
Loss = 2.4351e-02, PNorm = 164.3042, GNorm = 0.3407, lr_0 = 4.9028e-04
Loss = 2.5198e-02, PNorm = 164.3381, GNorm = 0.3651, lr_0 = 4.8995e-04
Loss = 2.7810e-02, PNorm = 164.3657, GNorm = 0.3048, lr_0 = 4.8961e-04
Loss = 2.3861e-02, PNorm = 164.3999, GNorm = 0.4490, lr_0 = 4.8928e-04
Loss = 3.6655e-02, PNorm = 164.4265, GNorm = 0.5316, lr_0 = 4.8894e-04
Loss = 2.7433e-02, PNorm = 164.4572, GNorm = 0.3472, lr_0 = 4.8861e-04
Loss = 2.5341e-02, PNorm = 164.4881, GNorm = 0.3291, lr_0 = 4.8827e-04
Loss = 2.1242e-02, PNorm = 164.5208, GNorm = 0.3104, lr_0 = 4.8794e-04
Loss = 3.0905e-02, PNorm = 164.5567, GNorm = 0.3954, lr_0 = 4.8760e-04
Loss = 2.3011e-02, PNorm = 164.5860, GNorm = 0.1953, lr_0 = 4.8727e-04
Loss = 2.6052e-02, PNorm = 164.6142, GNorm = 0.4608, lr_0 = 4.8693e-04
Loss = 2.5260e-02, PNorm = 164.6452, GNorm = 0.2573, lr_0 = 4.8660e-04
Loss = 2.5912e-02, PNorm = 164.6786, GNorm = 0.2723, lr_0 = 4.8627e-04
Loss = 2.6863e-02, PNorm = 164.7119, GNorm = 0.3769, lr_0 = 4.8593e-04
Loss = 1.9953e-02, PNorm = 164.7478, GNorm = 0.3523, lr_0 = 4.8560e-04
Loss = 2.3783e-02, PNorm = 164.7784, GNorm = 0.3431, lr_0 = 4.8527e-04
Loss = 3.9303e-02, PNorm = 164.8098, GNorm = 0.4156, lr_0 = 4.8494e-04
Loss = 4.0678e-02, PNorm = 164.8477, GNorm = 0.2785, lr_0 = 4.8460e-04
Loss = 2.7705e-02, PNorm = 164.8868, GNorm = 0.5328, lr_0 = 4.8427e-04
Loss = 2.5514e-02, PNorm = 164.9223, GNorm = 0.3016, lr_0 = 4.8394e-04
Loss = 2.6189e-02, PNorm = 164.9576, GNorm = 0.4523, lr_0 = 4.8361e-04
Loss = 2.6185e-02, PNorm = 164.9864, GNorm = 0.4779, lr_0 = 4.8328e-04
Loss = 2.5183e-02, PNorm = 165.0219, GNorm = 0.3596, lr_0 = 4.8295e-04
Loss = 3.0032e-02, PNorm = 165.0600, GNorm = 0.5155, lr_0 = 4.8262e-04
Loss = 2.6205e-02, PNorm = 165.0940, GNorm = 0.2813, lr_0 = 4.8228e-04
Loss = 2.5910e-02, PNorm = 165.1323, GNorm = 0.3734, lr_0 = 4.8195e-04
Loss = 2.7098e-02, PNorm = 165.1659, GNorm = 0.5847, lr_0 = 4.8162e-04
Loss = 2.8378e-02, PNorm = 165.1960, GNorm = 0.5106, lr_0 = 4.8129e-04
Loss = 3.0956e-02, PNorm = 165.2260, GNorm = 0.5856, lr_0 = 4.8096e-04
Loss = 3.2399e-02, PNorm = 165.2685, GNorm = 0.6272, lr_0 = 4.8064e-04
Loss = 2.8396e-02, PNorm = 165.3043, GNorm = 0.2979, lr_0 = 4.8031e-04
Loss = 2.4912e-02, PNorm = 165.3409, GNorm = 0.3394, lr_0 = 4.7998e-04
Loss = 2.7967e-02, PNorm = 165.3756, GNorm = 0.3235, lr_0 = 4.7965e-04
Loss = 2.6222e-02, PNorm = 165.4090, GNorm = 0.2360, lr_0 = 4.7932e-04
Loss = 3.4409e-02, PNorm = 165.4498, GNorm = 0.3407, lr_0 = 4.7899e-04
Loss = 2.9523e-02, PNorm = 165.4928, GNorm = 0.4187, lr_0 = 4.7866e-04
Loss = 2.8994e-02, PNorm = 165.5303, GNorm = 0.4046, lr_0 = 4.7833e-04
Loss = 2.6433e-02, PNorm = 165.5665, GNorm = 0.2931, lr_0 = 4.7801e-04
Loss = 2.9258e-02, PNorm = 165.6065, GNorm = 0.3273, lr_0 = 4.7768e-04
Loss = 3.4065e-02, PNorm = 165.6430, GNorm = 0.4233, lr_0 = 4.7735e-04
Loss = 2.6229e-02, PNorm = 165.6788, GNorm = 0.6467, lr_0 = 4.7703e-04
Validation mae = 0.122499
Epoch 11
Loss = 2.2002e-02, PNorm = 165.7102, GNorm = 0.2538, lr_0 = 4.7670e-04
Loss = 2.5990e-02, PNorm = 165.7348, GNorm = 0.2457, lr_0 = 4.7637e-04
Loss = 2.6788e-02, PNorm = 165.7596, GNorm = 0.2634, lr_0 = 4.7605e-04
Loss = 2.6820e-02, PNorm = 165.7780, GNorm = 0.3704, lr_0 = 4.7572e-04
Loss = 3.2205e-02, PNorm = 165.8042, GNorm = 0.5027, lr_0 = 4.7539e-04
Loss = 2.5451e-02, PNorm = 165.8259, GNorm = 0.3447, lr_0 = 4.7507e-04
Loss = 2.0811e-02, PNorm = 165.8486, GNorm = 0.4096, lr_0 = 4.7474e-04
Loss = 2.8273e-02, PNorm = 165.8705, GNorm = 0.3501, lr_0 = 4.7442e-04
Loss = 2.1066e-02, PNorm = 165.8978, GNorm = 0.2100, lr_0 = 4.7409e-04
Loss = 2.3172e-02, PNorm = 165.9275, GNorm = 0.4345, lr_0 = 4.7377e-04
Loss = 2.3387e-02, PNorm = 165.9478, GNorm = 0.2488, lr_0 = 4.7344e-04
Loss = 2.3361e-02, PNorm = 165.9686, GNorm = 0.7143, lr_0 = 4.7312e-04
Loss = 2.0709e-02, PNorm = 165.9899, GNorm = 0.3884, lr_0 = 4.7279e-04
Loss = 2.1316e-02, PNorm = 166.0150, GNorm = 0.4887, lr_0 = 4.7247e-04
Loss = 2.5503e-02, PNorm = 166.0364, GNorm = 0.5240, lr_0 = 4.7215e-04
Loss = 2.0921e-02, PNorm = 166.0587, GNorm = 0.2164, lr_0 = 4.7182e-04
Loss = 2.1081e-02, PNorm = 166.0821, GNorm = 0.3601, lr_0 = 4.7150e-04
Loss = 2.2844e-02, PNorm = 166.1069, GNorm = 0.5339, lr_0 = 4.7118e-04
Loss = 1.9193e-02, PNorm = 166.1277, GNorm = 0.2637, lr_0 = 4.7085e-04
Loss = 2.2822e-02, PNorm = 166.1541, GNorm = 0.5331, lr_0 = 4.7053e-04
Loss = 2.4528e-02, PNorm = 166.1796, GNorm = 0.3029, lr_0 = 4.7021e-04
Loss = 2.1144e-02, PNorm = 166.2022, GNorm = 0.5929, lr_0 = 4.6989e-04
Loss = 2.2655e-02, PNorm = 166.2289, GNorm = 0.3802, lr_0 = 4.6957e-04
Loss = 2.0348e-02, PNorm = 166.2556, GNorm = 0.1909, lr_0 = 4.6924e-04
Loss = 2.2856e-02, PNorm = 166.2818, GNorm = 0.2646, lr_0 = 4.6892e-04
Loss = 2.2405e-02, PNorm = 166.3076, GNorm = 0.2596, lr_0 = 4.6860e-04
Loss = 1.7642e-02, PNorm = 166.3320, GNorm = 0.2503, lr_0 = 4.6828e-04
Loss = 2.1669e-02, PNorm = 166.3574, GNorm = 0.2211, lr_0 = 4.6796e-04
Loss = 1.8241e-02, PNorm = 166.3771, GNorm = 0.3106, lr_0 = 4.6764e-04
Loss = 2.3710e-02, PNorm = 166.3989, GNorm = 0.5319, lr_0 = 4.6732e-04
Loss = 2.0657e-02, PNorm = 166.4259, GNorm = 0.4944, lr_0 = 4.6700e-04
Loss = 2.0333e-02, PNorm = 166.4485, GNorm = 0.2545, lr_0 = 4.6668e-04
Loss = 2.7608e-02, PNorm = 166.4712, GNorm = 0.4811, lr_0 = 4.6636e-04
Loss = 2.1517e-02, PNorm = 166.4943, GNorm = 0.6709, lr_0 = 4.6604e-04
Loss = 1.6385e-02, PNorm = 166.5205, GNorm = 0.1855, lr_0 = 4.6572e-04
Loss = 1.9702e-02, PNorm = 166.5438, GNorm = 0.2025, lr_0 = 4.6540e-04
Loss = 2.9335e-02, PNorm = 166.5663, GNorm = 0.2746, lr_0 = 4.6508e-04
Loss = 2.3219e-02, PNorm = 166.5918, GNorm = 0.3210, lr_0 = 4.6476e-04
Loss = 2.3987e-02, PNorm = 166.6191, GNorm = 0.3265, lr_0 = 4.6445e-04
Loss = 2.5776e-02, PNorm = 166.6495, GNorm = 0.4360, lr_0 = 4.6413e-04
Loss = 2.3818e-02, PNorm = 166.6746, GNorm = 0.3040, lr_0 = 4.6381e-04
Loss = 2.8131e-02, PNorm = 166.7043, GNorm = 0.2527, lr_0 = 4.6349e-04
Loss = 2.2350e-02, PNorm = 166.7305, GNorm = 0.5524, lr_0 = 4.6317e-04
Loss = 2.1949e-02, PNorm = 166.7545, GNorm = 0.4567, lr_0 = 4.6286e-04
Loss = 2.1157e-02, PNorm = 166.7821, GNorm = 0.3949, lr_0 = 4.6254e-04
Loss = 2.0458e-02, PNorm = 166.8094, GNorm = 0.2611, lr_0 = 4.6222e-04
Loss = 2.1107e-02, PNorm = 166.8355, GNorm = 0.3178, lr_0 = 4.6191e-04
Loss = 2.6352e-02, PNorm = 166.8627, GNorm = 0.3742, lr_0 = 4.6159e-04
Loss = 1.8932e-02, PNorm = 166.8865, GNorm = 0.1981, lr_0 = 4.6127e-04
Loss = 2.4370e-02, PNorm = 166.9105, GNorm = 0.4042, lr_0 = 4.6096e-04
Loss = 3.5308e-02, PNorm = 166.9304, GNorm = 0.6384, lr_0 = 4.6064e-04
Loss = 2.1977e-02, PNorm = 166.9554, GNorm = 0.4392, lr_0 = 4.6033e-04
Loss = 2.1438e-02, PNorm = 166.9822, GNorm = 0.2749, lr_0 = 4.6001e-04
Loss = 2.6466e-02, PNorm = 167.0120, GNorm = 0.5750, lr_0 = 4.5970e-04
Loss = 2.3223e-02, PNorm = 167.0413, GNorm = 0.3348, lr_0 = 4.5938e-04
Loss = 2.0431e-02, PNorm = 167.0661, GNorm = 0.2797, lr_0 = 4.5907e-04
Loss = 1.9020e-02, PNorm = 167.0914, GNorm = 0.4924, lr_0 = 4.5875e-04
Loss = 1.9840e-02, PNorm = 167.1141, GNorm = 0.4686, lr_0 = 4.5844e-04
Loss = 2.0854e-02, PNorm = 167.1460, GNorm = 0.3370, lr_0 = 4.5812e-04
Loss = 2.6260e-02, PNorm = 167.1720, GNorm = 0.5890, lr_0 = 4.5781e-04
Loss = 1.9361e-02, PNorm = 167.1965, GNorm = 0.2770, lr_0 = 4.5750e-04
Loss = 2.1261e-02, PNorm = 167.2244, GNorm = 0.4858, lr_0 = 4.5718e-04
Loss = 2.1046e-02, PNorm = 167.2505, GNorm = 0.3444, lr_0 = 4.5687e-04
Loss = 1.8569e-02, PNorm = 167.2777, GNorm = 0.2100, lr_0 = 4.5656e-04
Loss = 2.1867e-02, PNorm = 167.3027, GNorm = 0.3676, lr_0 = 4.5624e-04
Loss = 2.1246e-02, PNorm = 167.3284, GNorm = 0.3935, lr_0 = 4.5593e-04
Loss = 3.6754e-02, PNorm = 167.3566, GNorm = 0.2746, lr_0 = 4.5562e-04
Loss = 3.7061e-02, PNorm = 167.3773, GNorm = 0.4311, lr_0 = 4.5531e-04
Loss = 3.5971e-02, PNorm = 167.3999, GNorm = 0.5343, lr_0 = 4.5499e-04
Loss = 2.0953e-02, PNorm = 167.4336, GNorm = 0.2808, lr_0 = 4.5468e-04
Loss = 2.2238e-02, PNorm = 167.4668, GNorm = 0.4119, lr_0 = 4.5437e-04
Loss = 2.4974e-02, PNorm = 167.5000, GNorm = 0.4047, lr_0 = 4.5406e-04
Loss = 2.6083e-02, PNorm = 167.5329, GNorm = 0.3549, lr_0 = 4.5375e-04
Loss = 1.9454e-02, PNorm = 167.5601, GNorm = 0.5041, lr_0 = 4.5344e-04
Loss = 2.5782e-02, PNorm = 167.5895, GNorm = 0.6183, lr_0 = 4.5313e-04
Loss = 2.4930e-02, PNorm = 167.6173, GNorm = 0.5417, lr_0 = 4.5282e-04
Loss = 1.8600e-02, PNorm = 167.6447, GNorm = 0.1968, lr_0 = 4.5251e-04
Loss = 2.1022e-02, PNorm = 167.6720, GNorm = 0.6197, lr_0 = 4.5220e-04
Loss = 2.6082e-02, PNorm = 167.6992, GNorm = 0.2260, lr_0 = 4.5189e-04
Loss = 2.6389e-02, PNorm = 167.7303, GNorm = 0.3841, lr_0 = 4.5158e-04
Loss = 2.2714e-02, PNorm = 167.7565, GNorm = 0.3583, lr_0 = 4.5127e-04
Loss = 2.3235e-02, PNorm = 167.7823, GNorm = 0.2332, lr_0 = 4.5096e-04
Loss = 2.1180e-02, PNorm = 167.8109, GNorm = 0.2452, lr_0 = 4.5065e-04
Loss = 3.2280e-02, PNorm = 167.8331, GNorm = 0.2695, lr_0 = 4.5034e-04
Loss = 3.1610e-02, PNorm = 167.8613, GNorm = 0.6912, lr_0 = 4.5003e-04
Loss = 2.8977e-02, PNorm = 167.8880, GNorm = 0.4939, lr_0 = 4.4972e-04
Loss = 2.2259e-02, PNorm = 167.9183, GNorm = 0.1910, lr_0 = 4.4942e-04
Loss = 2.6295e-02, PNorm = 167.9456, GNorm = 0.2675, lr_0 = 4.4911e-04
Loss = 2.3123e-02, PNorm = 167.9724, GNorm = 0.5519, lr_0 = 4.4880e-04
Loss = 2.0080e-02, PNorm = 168.0023, GNorm = 0.1696, lr_0 = 4.4849e-04
Loss = 2.7715e-02, PNorm = 168.0345, GNorm = 0.2200, lr_0 = 4.4819e-04
Loss = 2.1717e-02, PNorm = 168.0632, GNorm = 0.5312, lr_0 = 4.4788e-04
Loss = 2.2180e-02, PNorm = 168.0953, GNorm = 0.5123, lr_0 = 4.4757e-04
Loss = 2.1868e-02, PNorm = 168.1287, GNorm = 0.2856, lr_0 = 4.4727e-04
Loss = 2.6130e-02, PNorm = 168.1586, GNorm = 0.2847, lr_0 = 4.4696e-04
Loss = 2.4220e-02, PNorm = 168.1903, GNorm = 0.1959, lr_0 = 4.4665e-04
Loss = 2.4208e-02, PNorm = 168.2244, GNorm = 0.3088, lr_0 = 4.4635e-04
Loss = 2.2565e-02, PNorm = 168.2568, GNorm = 0.2083, lr_0 = 4.4604e-04
Loss = 1.8135e-02, PNorm = 168.2879, GNorm = 0.1838, lr_0 = 4.4574e-04
Loss = 2.4981e-02, PNorm = 168.3141, GNorm = 0.2113, lr_0 = 4.4543e-04
Loss = 1.9048e-02, PNorm = 168.3427, GNorm = 0.2430, lr_0 = 4.4513e-04
Loss = 1.9247e-02, PNorm = 168.3709, GNorm = 0.3317, lr_0 = 4.4482e-04
Loss = 2.5959e-02, PNorm = 168.3965, GNorm = 0.4403, lr_0 = 4.4452e-04
Loss = 2.2371e-02, PNorm = 168.4257, GNorm = 0.5348, lr_0 = 4.4421e-04
Loss = 2.0449e-02, PNorm = 168.4522, GNorm = 0.7314, lr_0 = 4.4391e-04
Loss = 2.2893e-02, PNorm = 168.4798, GNorm = 0.4029, lr_0 = 4.4360e-04
Loss = 2.0363e-02, PNorm = 168.5092, GNorm = 0.4045, lr_0 = 4.4330e-04
Loss = 1.8901e-02, PNorm = 168.5425, GNorm = 0.2669, lr_0 = 4.4299e-04
Loss = 1.8059e-02, PNorm = 168.5705, GNorm = 0.4457, lr_0 = 4.4269e-04
Loss = 1.8031e-02, PNorm = 168.5948, GNorm = 0.2655, lr_0 = 4.4239e-04
Loss = 3.1591e-02, PNorm = 168.6244, GNorm = 0.2240, lr_0 = 4.4209e-04
Loss = 1.8512e-02, PNorm = 168.6514, GNorm = 0.3946, lr_0 = 4.4178e-04
Loss = 2.0157e-02, PNorm = 168.6803, GNorm = 0.2882, lr_0 = 4.4148e-04
Loss = 2.2699e-02, PNorm = 168.7097, GNorm = 0.3896, lr_0 = 4.4118e-04
Loss = 2.0967e-02, PNorm = 168.7393, GNorm = 0.3741, lr_0 = 4.4088e-04
Loss = 2.0650e-02, PNorm = 168.7686, GNorm = 0.2359, lr_0 = 4.4057e-04
Loss = 2.6164e-02, PNorm = 168.7962, GNorm = 0.2349, lr_0 = 4.4027e-04
Loss = 2.7688e-02, PNorm = 168.8255, GNorm = 0.2808, lr_0 = 4.3997e-04
Loss = 2.2466e-02, PNorm = 168.8530, GNorm = 0.2112, lr_0 = 4.3967e-04
Loss = 2.7505e-02, PNorm = 168.8803, GNorm = 0.9979, lr_0 = 4.3937e-04
Validation mae = 0.122215
Epoch 12
Loss = 1.6285e-02, PNorm = 168.9033, GNorm = 0.1916, lr_0 = 4.3907e-04
Loss = 2.1327e-02, PNorm = 168.9238, GNorm = 0.4158, lr_0 = 4.3877e-04
Loss = 2.6191e-02, PNorm = 168.9448, GNorm = 1.1110, lr_0 = 4.3846e-04
Loss = 2.1598e-02, PNorm = 168.9600, GNorm = 0.3842, lr_0 = 4.3816e-04
Loss = 2.6297e-02, PNorm = 168.9855, GNorm = 0.5314, lr_0 = 4.3786e-04
Loss = 3.2211e-02, PNorm = 169.0089, GNorm = 0.4270, lr_0 = 4.3756e-04
Loss = 2.2051e-02, PNorm = 169.0336, GNorm = 0.4413, lr_0 = 4.3726e-04
Loss = 2.0969e-02, PNorm = 169.0569, GNorm = 0.2323, lr_0 = 4.3696e-04
Loss = 1.9016e-02, PNorm = 169.0829, GNorm = 0.2691, lr_0 = 4.3667e-04
Loss = 1.6881e-02, PNorm = 169.1102, GNorm = 0.2071, lr_0 = 4.3637e-04
Loss = 1.8938e-02, PNorm = 169.1339, GNorm = 0.2242, lr_0 = 4.3607e-04
Loss = 2.8975e-02, PNorm = 169.1542, GNorm = 0.4005, lr_0 = 4.3577e-04
Loss = 2.2351e-02, PNorm = 169.1746, GNorm = 0.2353, lr_0 = 4.3547e-04
Loss = 1.7696e-02, PNorm = 169.1964, GNorm = 0.2725, lr_0 = 4.3517e-04
Loss = 2.5563e-02, PNorm = 169.2142, GNorm = 0.2587, lr_0 = 4.3487e-04
Loss = 2.2846e-02, PNorm = 169.2357, GNorm = 0.4533, lr_0 = 4.3458e-04
Loss = 2.4915e-02, PNorm = 169.2567, GNorm = 0.2076, lr_0 = 4.3428e-04
Loss = 1.8828e-02, PNorm = 169.2817, GNorm = 0.3889, lr_0 = 4.3398e-04
Loss = 1.4873e-02, PNorm = 169.3023, GNorm = 0.1282, lr_0 = 4.3368e-04
Loss = 1.9538e-02, PNorm = 169.3230, GNorm = 0.4047, lr_0 = 4.3339e-04
Loss = 1.5149e-02, PNorm = 169.3432, GNorm = 0.7768, lr_0 = 4.3309e-04
Loss = 1.8293e-02, PNorm = 169.3640, GNorm = 0.2267, lr_0 = 4.3279e-04
Loss = 1.9770e-02, PNorm = 169.3859, GNorm = 0.3529, lr_0 = 4.3250e-04
Loss = 2.0561e-02, PNorm = 169.4047, GNorm = 0.2208, lr_0 = 4.3220e-04
Loss = 1.7374e-02, PNorm = 169.4253, GNorm = 0.4938, lr_0 = 4.3190e-04
Loss = 1.8094e-02, PNorm = 169.4439, GNorm = 0.2699, lr_0 = 4.3161e-04
Loss = 2.1135e-02, PNorm = 169.4667, GNorm = 0.2559, lr_0 = 4.3131e-04
Loss = 2.1063e-02, PNorm = 169.4879, GNorm = 0.3050, lr_0 = 4.3102e-04
Loss = 2.7203e-02, PNorm = 169.5074, GNorm = 0.2194, lr_0 = 4.3072e-04
Loss = 1.6843e-02, PNorm = 169.5325, GNorm = 0.2468, lr_0 = 4.3043e-04
Loss = 2.2499e-02, PNorm = 169.5608, GNorm = 0.4821, lr_0 = 4.3013e-04
Loss = 1.9588e-02, PNorm = 169.5866, GNorm = 0.4150, lr_0 = 4.2984e-04
Loss = 2.0625e-02, PNorm = 169.6112, GNorm = 0.3529, lr_0 = 4.2954e-04
Loss = 2.0102e-02, PNorm = 169.6338, GNorm = 0.2859, lr_0 = 4.2925e-04
Loss = 1.5830e-02, PNorm = 169.6573, GNorm = 0.1495, lr_0 = 4.2895e-04
Loss = 1.8839e-02, PNorm = 169.6787, GNorm = 0.2586, lr_0 = 4.2866e-04
Loss = 2.0182e-02, PNorm = 169.7017, GNorm = 0.2831, lr_0 = 4.2837e-04
Loss = 1.7300e-02, PNorm = 169.7264, GNorm = 0.1511, lr_0 = 4.2807e-04
Loss = 2.0648e-02, PNorm = 169.7500, GNorm = 0.5486, lr_0 = 4.2778e-04
Loss = 1.8561e-02, PNorm = 169.7745, GNorm = 0.2660, lr_0 = 4.2749e-04
Loss = 1.7479e-02, PNorm = 169.8004, GNorm = 0.4888, lr_0 = 4.2719e-04
Loss = 1.5859e-02, PNorm = 169.8231, GNorm = 0.3976, lr_0 = 4.2690e-04
Loss = 1.9753e-02, PNorm = 169.8444, GNorm = 0.2106, lr_0 = 4.2661e-04
Loss = 1.9271e-02, PNorm = 169.8663, GNorm = 0.2721, lr_0 = 4.2632e-04
Loss = 1.3688e-02, PNorm = 169.8855, GNorm = 0.3739, lr_0 = 4.2602e-04
Loss = 1.4837e-02, PNorm = 169.9050, GNorm = 0.5119, lr_0 = 4.2573e-04
Loss = 3.3061e-02, PNorm = 169.9310, GNorm = 0.4223, lr_0 = 4.2544e-04
Loss = 1.7249e-02, PNorm = 169.9525, GNorm = 0.3828, lr_0 = 4.2515e-04
Loss = 1.6898e-02, PNorm = 169.9699, GNorm = 0.2108, lr_0 = 4.2486e-04
Loss = 2.0692e-02, PNorm = 169.9851, GNorm = 0.2289, lr_0 = 4.2457e-04
Loss = 1.9094e-02, PNorm = 170.0041, GNorm = 0.3471, lr_0 = 4.2428e-04
Loss = 2.0810e-02, PNorm = 170.0234, GNorm = 0.3295, lr_0 = 4.2399e-04
Loss = 1.6972e-02, PNorm = 170.0473, GNorm = 0.3080, lr_0 = 4.2370e-04
Loss = 1.7880e-02, PNorm = 170.0707, GNorm = 0.2245, lr_0 = 4.2340e-04
Loss = 2.0720e-02, PNorm = 170.0938, GNorm = 0.3106, lr_0 = 4.2311e-04
Loss = 1.5065e-02, PNorm = 170.1189, GNorm = 0.3724, lr_0 = 4.2283e-04
Loss = 1.6429e-02, PNorm = 170.1396, GNorm = 0.2360, lr_0 = 4.2254e-04
Loss = 2.1380e-02, PNorm = 170.1657, GNorm = 0.4543, lr_0 = 4.2225e-04
Loss = 1.9904e-02, PNorm = 170.1908, GNorm = 0.4909, lr_0 = 4.2196e-04
Loss = 1.7564e-02, PNorm = 170.2154, GNorm = 0.5912, lr_0 = 4.2167e-04
Loss = 1.9775e-02, PNorm = 170.2371, GNorm = 0.2734, lr_0 = 4.2138e-04
Loss = 1.6828e-02, PNorm = 170.2572, GNorm = 0.1912, lr_0 = 4.2109e-04
Loss = 2.1590e-02, PNorm = 170.2779, GNorm = 0.1724, lr_0 = 4.2080e-04
Loss = 2.3835e-02, PNorm = 170.2925, GNorm = 0.2054, lr_0 = 4.2051e-04
Loss = 1.6192e-02, PNorm = 170.3121, GNorm = 0.1595, lr_0 = 4.2023e-04
Loss = 1.6416e-02, PNorm = 170.3335, GNorm = 0.4839, lr_0 = 4.1994e-04
Loss = 1.8638e-02, PNorm = 170.3570, GNorm = 0.3155, lr_0 = 4.1965e-04
Loss = 2.2535e-02, PNorm = 170.3809, GNorm = 0.2892, lr_0 = 4.1936e-04
Loss = 2.2067e-02, PNorm = 170.4014, GNorm = 0.5045, lr_0 = 4.1907e-04
Loss = 1.7681e-02, PNorm = 170.4227, GNorm = 0.2324, lr_0 = 4.1879e-04
Loss = 1.6654e-02, PNorm = 170.4481, GNorm = 0.8129, lr_0 = 4.1850e-04
Loss = 1.7183e-02, PNorm = 170.4766, GNorm = 0.2506, lr_0 = 4.1821e-04
Loss = 1.7028e-02, PNorm = 170.5029, GNorm = 0.2282, lr_0 = 4.1793e-04
Loss = 1.7010e-02, PNorm = 170.5287, GNorm = 0.2238, lr_0 = 4.1764e-04
Loss = 1.8987e-02, PNorm = 170.5548, GNorm = 0.3027, lr_0 = 4.1736e-04
Loss = 1.5835e-02, PNorm = 170.5766, GNorm = 0.5146, lr_0 = 4.1707e-04
Loss = 1.8002e-02, PNorm = 170.6005, GNorm = 0.5132, lr_0 = 4.1678e-04
Loss = 1.9756e-02, PNorm = 170.6266, GNorm = 0.4540, lr_0 = 4.1650e-04
Loss = 1.7253e-02, PNorm = 170.6468, GNorm = 0.3720, lr_0 = 4.1621e-04
Loss = 1.6036e-02, PNorm = 170.6718, GNorm = 0.3236, lr_0 = 4.1593e-04
Loss = 1.5906e-02, PNorm = 170.6935, GNorm = 0.2185, lr_0 = 4.1564e-04
Loss = 2.4072e-02, PNorm = 170.7180, GNorm = 0.1954, lr_0 = 4.1536e-04
Loss = 1.6920e-02, PNorm = 170.7420, GNorm = 0.2443, lr_0 = 4.1507e-04
Loss = 1.8523e-02, PNorm = 170.7649, GNorm = 0.2758, lr_0 = 4.1479e-04
Loss = 2.0435e-02, PNorm = 170.7929, GNorm = 0.4264, lr_0 = 4.1450e-04
Loss = 2.6795e-02, PNorm = 170.8185, GNorm = 0.8788, lr_0 = 4.1422e-04
Loss = 2.3079e-02, PNorm = 170.8410, GNorm = 0.4455, lr_0 = 4.1394e-04
Loss = 1.5598e-02, PNorm = 170.8669, GNorm = 0.2939, lr_0 = 4.1365e-04
Loss = 1.7648e-02, PNorm = 170.8920, GNorm = 0.4168, lr_0 = 4.1337e-04
Loss = 1.8434e-02, PNorm = 170.9146, GNorm = 0.3216, lr_0 = 4.1309e-04
Loss = 2.5567e-02, PNorm = 170.9403, GNorm = 0.2872, lr_0 = 4.1280e-04
Loss = 1.6376e-02, PNorm = 170.9660, GNorm = 0.3509, lr_0 = 4.1252e-04
Loss = 1.9353e-02, PNorm = 170.9888, GNorm = 0.2649, lr_0 = 4.1224e-04
Loss = 2.3480e-02, PNorm = 171.0121, GNorm = 1.5378, lr_0 = 4.1196e-04
Loss = 1.8275e-02, PNorm = 171.0305, GNorm = 0.3565, lr_0 = 4.1167e-04
Loss = 2.1956e-02, PNorm = 171.0557, GNorm = 0.8000, lr_0 = 4.1139e-04
Loss = 1.7712e-02, PNorm = 171.0820, GNorm = 0.3400, lr_0 = 4.1111e-04
Loss = 1.7873e-02, PNorm = 171.1047, GNorm = 0.2429, lr_0 = 4.1083e-04
Loss = 1.9319e-02, PNorm = 171.1310, GNorm = 0.7080, lr_0 = 4.1055e-04
Loss = 2.7280e-02, PNorm = 171.1583, GNorm = 0.8645, lr_0 = 4.1027e-04
Loss = 2.0736e-02, PNorm = 171.1920, GNorm = 0.6198, lr_0 = 4.0998e-04
Loss = 2.0040e-02, PNorm = 171.2194, GNorm = 0.2195, lr_0 = 4.0970e-04
Loss = 2.2939e-02, PNorm = 171.2430, GNorm = 0.3127, lr_0 = 4.0942e-04
Loss = 2.5050e-02, PNorm = 171.2688, GNorm = 0.6125, lr_0 = 4.0914e-04
Loss = 1.7277e-02, PNorm = 171.2945, GNorm = 0.2575, lr_0 = 4.0886e-04
Loss = 1.8658e-02, PNorm = 171.3177, GNorm = 0.2346, lr_0 = 4.0858e-04
Loss = 2.1585e-02, PNorm = 171.3424, GNorm = 0.7749, lr_0 = 4.0830e-04
Loss = 1.7696e-02, PNorm = 171.3682, GNorm = 0.5657, lr_0 = 4.0802e-04
Loss = 1.8349e-02, PNorm = 171.3913, GNorm = 0.2790, lr_0 = 4.0774e-04
Loss = 1.9296e-02, PNorm = 171.4161, GNorm = 0.3312, lr_0 = 4.0746e-04
Loss = 1.6822e-02, PNorm = 171.4403, GNorm = 0.2246, lr_0 = 4.0718e-04
Loss = 1.6292e-02, PNorm = 171.4637, GNorm = 0.3522, lr_0 = 4.0691e-04
Loss = 2.3917e-02, PNorm = 171.4849, GNorm = 0.1941, lr_0 = 4.0663e-04
Loss = 1.9275e-02, PNorm = 171.5120, GNorm = 0.4363, lr_0 = 4.0635e-04
Loss = 1.7424e-02, PNorm = 171.5336, GNorm = 0.1947, lr_0 = 4.0607e-04
Loss = 2.1795e-02, PNorm = 171.5567, GNorm = 0.4977, lr_0 = 4.0579e-04
Loss = 2.3514e-02, PNorm = 171.5796, GNorm = 0.3294, lr_0 = 4.0551e-04
Loss = 1.8207e-02, PNorm = 171.6061, GNorm = 0.4996, lr_0 = 4.0524e-04
Loss = 2.3444e-02, PNorm = 171.6354, GNorm = 0.4005, lr_0 = 4.0496e-04
Loss = 1.8467e-02, PNorm = 171.6635, GNorm = 0.2449, lr_0 = 4.0468e-04
Validation mae = 0.121814
Epoch 13
Loss = 1.6051e-02, PNorm = 171.6842, GNorm = 0.1504, lr_0 = 4.0440e-04
Loss = 1.3598e-02, PNorm = 171.7011, GNorm = 0.3399, lr_0 = 4.0413e-04
Loss = 1.3592e-02, PNorm = 171.7168, GNorm = 0.2111, lr_0 = 4.0385e-04
Loss = 2.0865e-02, PNorm = 171.7337, GNorm = 0.3847, lr_0 = 4.0357e-04
Loss = 2.3333e-02, PNorm = 171.7521, GNorm = 0.3716, lr_0 = 4.0330e-04
Loss = 1.6732e-02, PNorm = 171.7708, GNorm = 0.6689, lr_0 = 4.0302e-04
Loss = 1.5784e-02, PNorm = 171.7894, GNorm = 0.2666, lr_0 = 4.0274e-04
Loss = 1.4584e-02, PNorm = 171.8056, GNorm = 0.1855, lr_0 = 4.0247e-04
Loss = 1.4811e-02, PNorm = 171.8228, GNorm = 0.4151, lr_0 = 4.0219e-04
Loss = 1.8427e-02, PNorm = 171.8381, GNorm = 0.4605, lr_0 = 4.0192e-04
Loss = 1.7879e-02, PNorm = 171.8535, GNorm = 0.4361, lr_0 = 4.0164e-04
Loss = 1.3785e-02, PNorm = 171.8699, GNorm = 0.1737, lr_0 = 4.0137e-04
Loss = 1.4416e-02, PNorm = 171.8857, GNorm = 0.2784, lr_0 = 4.0109e-04
Loss = 1.5534e-02, PNorm = 171.9033, GNorm = 0.3531, lr_0 = 4.0082e-04
Loss = 1.6802e-02, PNorm = 171.9197, GNorm = 0.5081, lr_0 = 4.0054e-04
Loss = 1.3452e-02, PNorm = 171.9363, GNorm = 0.1797, lr_0 = 4.0027e-04
Loss = 1.4402e-02, PNorm = 171.9509, GNorm = 0.1912, lr_0 = 3.9999e-04
Loss = 3.2680e-02, PNorm = 171.9696, GNorm = 0.2660, lr_0 = 3.9972e-04
Loss = 1.6409e-02, PNorm = 171.9874, GNorm = 0.4905, lr_0 = 3.9945e-04
Loss = 1.5255e-02, PNorm = 172.0058, GNorm = 0.6442, lr_0 = 3.9917e-04
Loss = 1.6357e-02, PNorm = 172.0227, GNorm = 0.5112, lr_0 = 3.9890e-04
Loss = 1.3839e-02, PNorm = 172.0380, GNorm = 0.3071, lr_0 = 3.9863e-04
Loss = 1.7964e-02, PNorm = 172.0497, GNorm = 0.2612, lr_0 = 3.9835e-04
Loss = 1.7284e-02, PNorm = 172.0702, GNorm = 0.2473, lr_0 = 3.9808e-04
Loss = 1.2845e-02, PNorm = 172.0906, GNorm = 0.2193, lr_0 = 3.9781e-04
Loss = 1.5470e-02, PNorm = 172.1044, GNorm = 0.4438, lr_0 = 3.9753e-04
Loss = 1.9268e-02, PNorm = 172.1223, GNorm = 0.3536, lr_0 = 3.9726e-04
Loss = 1.5279e-02, PNorm = 172.1392, GNorm = 0.1384, lr_0 = 3.9699e-04
Loss = 1.4728e-02, PNorm = 172.1574, GNorm = 0.2068, lr_0 = 3.9672e-04
Loss = 1.7280e-02, PNorm = 172.1769, GNorm = 0.3172, lr_0 = 3.9645e-04
Loss = 2.1967e-02, PNorm = 172.1934, GNorm = 1.1778, lr_0 = 3.9617e-04
Loss = 1.4368e-02, PNorm = 172.2088, GNorm = 0.2446, lr_0 = 3.9590e-04
Loss = 1.8107e-02, PNorm = 172.2275, GNorm = 0.2666, lr_0 = 3.9563e-04
Loss = 1.6322e-02, PNorm = 172.2449, GNorm = 0.1860, lr_0 = 3.9536e-04
Loss = 1.3026e-02, PNorm = 172.2625, GNorm = 0.2557, lr_0 = 3.9509e-04
Loss = 1.9935e-02, PNorm = 172.2796, GNorm = 0.2403, lr_0 = 3.9482e-04
Loss = 1.4202e-02, PNorm = 172.2968, GNorm = 0.2222, lr_0 = 3.9455e-04
Loss = 1.1672e-02, PNorm = 172.3147, GNorm = 0.1651, lr_0 = 3.9428e-04
Loss = 1.4826e-02, PNorm = 172.3331, GNorm = 0.2820, lr_0 = 3.9401e-04
Loss = 1.3701e-02, PNorm = 172.3514, GNorm = 0.4148, lr_0 = 3.9374e-04
Loss = 1.7118e-02, PNorm = 172.3666, GNorm = 0.3457, lr_0 = 3.9347e-04
Loss = 1.4459e-02, PNorm = 172.3840, GNorm = 0.3868, lr_0 = 3.9320e-04
Loss = 1.4118e-02, PNorm = 172.4018, GNorm = 0.2972, lr_0 = 3.9293e-04
Loss = 1.4692e-02, PNorm = 172.4185, GNorm = 0.3251, lr_0 = 3.9266e-04
Loss = 1.4732e-02, PNorm = 172.4377, GNorm = 0.3558, lr_0 = 3.9239e-04
Loss = 1.8010e-02, PNorm = 172.4557, GNorm = 0.1170, lr_0 = 3.9212e-04
Loss = 1.2856e-02, PNorm = 172.4720, GNorm = 0.3410, lr_0 = 3.9185e-04
Loss = 1.5301e-02, PNorm = 172.4918, GNorm = 0.2229, lr_0 = 3.9159e-04
Loss = 1.2212e-02, PNorm = 172.5080, GNorm = 0.2044, lr_0 = 3.9132e-04
Loss = 1.6156e-02, PNorm = 172.5217, GNorm = 0.1696, lr_0 = 3.9105e-04
Loss = 1.8581e-02, PNorm = 172.5386, GNorm = 0.7381, lr_0 = 3.9078e-04
Loss = 2.3348e-02, PNorm = 172.5561, GNorm = 0.3201, lr_0 = 3.9051e-04
Loss = 1.8222e-02, PNorm = 172.5732, GNorm = 0.1611, lr_0 = 3.9025e-04
Loss = 1.8269e-02, PNorm = 172.5934, GNorm = 0.1796, lr_0 = 3.8998e-04
Loss = 2.1236e-02, PNorm = 172.6132, GNorm = 0.3559, lr_0 = 3.8971e-04
Loss = 1.6572e-02, PNorm = 172.6323, GNorm = 0.1809, lr_0 = 3.8945e-04
Loss = 2.2051e-02, PNorm = 172.6520, GNorm = 0.2896, lr_0 = 3.8918e-04
Loss = 1.7371e-02, PNorm = 172.6726, GNorm = 0.7085, lr_0 = 3.8891e-04
Loss = 1.2824e-02, PNorm = 172.6915, GNorm = 0.1946, lr_0 = 3.8865e-04
Loss = 1.3402e-02, PNorm = 172.7122, GNorm = 0.1687, lr_0 = 3.8838e-04
Loss = 1.5836e-02, PNorm = 172.7294, GNorm = 0.3092, lr_0 = 3.8811e-04
Loss = 1.8332e-02, PNorm = 172.7499, GNorm = 0.2094, lr_0 = 3.8785e-04
Loss = 1.1068e-02, PNorm = 172.7712, GNorm = 0.1635, lr_0 = 3.8758e-04
Loss = 1.9418e-02, PNorm = 172.7910, GNorm = 0.3257, lr_0 = 3.8732e-04
Loss = 1.8413e-02, PNorm = 172.8122, GNorm = 0.4968, lr_0 = 3.8705e-04
Loss = 1.4253e-02, PNorm = 172.8285, GNorm = 0.1771, lr_0 = 3.8679e-04
Loss = 1.5084e-02, PNorm = 172.8450, GNorm = 0.2262, lr_0 = 3.8652e-04
Loss = 1.4839e-02, PNorm = 172.8600, GNorm = 0.3744, lr_0 = 3.8626e-04
Loss = 1.7092e-02, PNorm = 172.8753, GNorm = 0.3795, lr_0 = 3.8599e-04
Loss = 2.2551e-02, PNorm = 172.8910, GNorm = 0.1205, lr_0 = 3.8573e-04
Loss = 1.8797e-02, PNorm = 172.9099, GNorm = 0.1460, lr_0 = 3.8546e-04
Loss = 1.2601e-02, PNorm = 172.9280, GNorm = 0.2715, lr_0 = 3.8520e-04
Loss = 1.5762e-02, PNorm = 172.9488, GNorm = 0.1722, lr_0 = 3.8493e-04
Loss = 2.0465e-02, PNorm = 172.9656, GNorm = 0.3797, lr_0 = 3.8467e-04
Loss = 1.6801e-02, PNorm = 172.9886, GNorm = 0.7498, lr_0 = 3.8441e-04
Loss = 1.6938e-02, PNorm = 173.0119, GNorm = 0.3787, lr_0 = 3.8414e-04
Loss = 1.6944e-02, PNorm = 173.0374, GNorm = 0.2725, lr_0 = 3.8388e-04
Loss = 1.3683e-02, PNorm = 173.0592, GNorm = 0.3099, lr_0 = 3.8362e-04
Loss = 1.7495e-02, PNorm = 173.0803, GNorm = 0.3649, lr_0 = 3.8336e-04
Loss = 1.6278e-02, PNorm = 173.1032, GNorm = 0.1785, lr_0 = 3.8309e-04
Loss = 1.3904e-02, PNorm = 173.1254, GNorm = 0.4575, lr_0 = 3.8283e-04
Loss = 2.5987e-02, PNorm = 173.1431, GNorm = 0.2656, lr_0 = 3.8257e-04
Loss = 2.0173e-02, PNorm = 173.1611, GNorm = 0.3284, lr_0 = 3.8231e-04
Loss = 1.7268e-02, PNorm = 173.1819, GNorm = 0.2340, lr_0 = 3.8204e-04
Loss = 1.3803e-02, PNorm = 173.2021, GNorm = 0.4017, lr_0 = 3.8178e-04
Loss = 1.4634e-02, PNorm = 173.2234, GNorm = 0.2353, lr_0 = 3.8152e-04
Loss = 1.3738e-02, PNorm = 173.2442, GNorm = 0.1796, lr_0 = 3.8126e-04
Loss = 1.8046e-02, PNorm = 173.2633, GNorm = 0.3346, lr_0 = 3.8100e-04
Loss = 2.1153e-02, PNorm = 173.2814, GNorm = 0.2196, lr_0 = 3.8074e-04
Loss = 2.3992e-02, PNorm = 173.2978, GNorm = 0.8291, lr_0 = 3.8048e-04
Loss = 1.9723e-02, PNorm = 173.3166, GNorm = 0.4191, lr_0 = 3.8022e-04
Loss = 1.3487e-02, PNorm = 173.3403, GNorm = 0.4098, lr_0 = 3.7995e-04
Loss = 1.2986e-02, PNorm = 173.3605, GNorm = 0.3350, lr_0 = 3.7969e-04
Loss = 1.4370e-02, PNorm = 173.3788, GNorm = 0.2605, lr_0 = 3.7943e-04
Loss = 1.2903e-02, PNorm = 173.4012, GNorm = 0.3133, lr_0 = 3.7917e-04
Loss = 1.8393e-02, PNorm = 173.4179, GNorm = 0.6240, lr_0 = 3.7891e-04
Loss = 1.4814e-02, PNorm = 173.4393, GNorm = 0.5276, lr_0 = 3.7866e-04
Loss = 1.9614e-02, PNorm = 173.4585, GNorm = 0.3615, lr_0 = 3.7840e-04
Loss = 1.3587e-02, PNorm = 173.4804, GNorm = 0.4592, lr_0 = 3.7814e-04
Loss = 1.3439e-02, PNorm = 173.5001, GNorm = 0.1782, lr_0 = 3.7788e-04
Loss = 1.6392e-02, PNorm = 173.5193, GNorm = 0.3396, lr_0 = 3.7762e-04
Loss = 1.6967e-02, PNorm = 173.5358, GNorm = 0.2749, lr_0 = 3.7736e-04
Loss = 1.8332e-02, PNorm = 173.5547, GNorm = 0.1924, lr_0 = 3.7710e-04
Loss = 1.7946e-02, PNorm = 173.5734, GNorm = 0.1802, lr_0 = 3.7684e-04
Loss = 2.1976e-02, PNorm = 173.5961, GNorm = 0.6303, lr_0 = 3.7659e-04
Loss = 1.2856e-02, PNorm = 173.6149, GNorm = 0.2793, lr_0 = 3.7633e-04
Loss = 2.0876e-02, PNorm = 173.6319, GNorm = 0.3007, lr_0 = 3.7607e-04
Loss = 2.2795e-02, PNorm = 173.6504, GNorm = 0.2491, lr_0 = 3.7581e-04
Loss = 1.8549e-02, PNorm = 173.6714, GNorm = 0.1977, lr_0 = 3.7555e-04
Loss = 1.2449e-02, PNorm = 173.6910, GNorm = 0.3017, lr_0 = 3.7530e-04
Loss = 1.6735e-02, PNorm = 173.7086, GNorm = 0.2161, lr_0 = 3.7504e-04
Loss = 1.6488e-02, PNorm = 173.7228, GNorm = 0.2275, lr_0 = 3.7478e-04
Loss = 1.5561e-02, PNorm = 173.7371, GNorm = 0.5069, lr_0 = 3.7453e-04
Loss = 1.4119e-02, PNorm = 173.7582, GNorm = 0.4503, lr_0 = 3.7427e-04
Loss = 1.7286e-02, PNorm = 173.7792, GNorm = 0.6834, lr_0 = 3.7401e-04
Loss = 2.0690e-02, PNorm = 173.7992, GNorm = 0.2646, lr_0 = 3.7376e-04
Loss = 1.6534e-02, PNorm = 173.8177, GNorm = 0.2161, lr_0 = 3.7350e-04
Loss = 1.3181e-02, PNorm = 173.8386, GNorm = 0.1810, lr_0 = 3.7325e-04
Loss = 1.5912e-02, PNorm = 173.8572, GNorm = 0.3062, lr_0 = 3.7299e-04
Loss = 1.8891e-02, PNorm = 173.8779, GNorm = 0.1619, lr_0 = 3.7273e-04
Validation mae = 0.121493
Epoch 14
Loss = 1.5562e-02, PNorm = 173.8933, GNorm = 0.1794, lr_0 = 3.7248e-04
Loss = 1.4127e-02, PNorm = 173.9076, GNorm = 0.2740, lr_0 = 3.7222e-04
Loss = 1.4907e-02, PNorm = 173.9180, GNorm = 0.2328, lr_0 = 3.7197e-04
Loss = 1.2170e-02, PNorm = 173.9320, GNorm = 0.2174, lr_0 = 3.7171e-04
Loss = 1.6532e-02, PNorm = 173.9471, GNorm = 0.2497, lr_0 = 3.7146e-04
Loss = 1.2897e-02, PNorm = 173.9613, GNorm = 0.2307, lr_0 = 3.7120e-04
Loss = 1.1539e-02, PNorm = 173.9759, GNorm = 0.1605, lr_0 = 3.7095e-04
Loss = 1.1834e-02, PNorm = 173.9912, GNorm = 0.1930, lr_0 = 3.7070e-04
Loss = 1.5009e-02, PNorm = 174.0076, GNorm = 0.1760, lr_0 = 3.7044e-04
Loss = 1.2725e-02, PNorm = 174.0227, GNorm = 0.3829, lr_0 = 3.7019e-04
Loss = 1.3956e-02, PNorm = 174.0357, GNorm = 0.2832, lr_0 = 3.6993e-04
Loss = 1.3757e-02, PNorm = 174.0507, GNorm = 0.8522, lr_0 = 3.6968e-04
Loss = 1.1168e-02, PNorm = 174.0624, GNorm = 0.2487, lr_0 = 3.6943e-04
Loss = 1.1925e-02, PNorm = 174.0775, GNorm = 0.1476, lr_0 = 3.6917e-04
Loss = 1.1930e-02, PNorm = 174.0924, GNorm = 0.3644, lr_0 = 3.6892e-04
Loss = 1.3652e-02, PNorm = 174.1105, GNorm = 0.1773, lr_0 = 3.6867e-04
Loss = 2.0880e-02, PNorm = 174.1286, GNorm = 0.2764, lr_0 = 3.6842e-04
Loss = 1.2978e-02, PNorm = 174.1390, GNorm = 0.2718, lr_0 = 3.6816e-04
Loss = 1.0845e-02, PNorm = 174.1524, GNorm = 0.3713, lr_0 = 3.6791e-04
Loss = 1.0375e-02, PNorm = 174.1621, GNorm = 0.2094, lr_0 = 3.6766e-04
Loss = 1.3650e-02, PNorm = 174.1754, GNorm = 0.3425, lr_0 = 3.6741e-04
Loss = 1.3801e-02, PNorm = 174.1916, GNorm = 0.2088, lr_0 = 3.6716e-04
Loss = 1.5752e-02, PNorm = 174.2069, GNorm = 0.2911, lr_0 = 3.6690e-04
Loss = 1.1742e-02, PNorm = 174.2226, GNorm = 0.1408, lr_0 = 3.6665e-04
Loss = 1.4075e-02, PNorm = 174.2352, GNorm = 0.4140, lr_0 = 3.6640e-04
Loss = 1.7033e-02, PNorm = 174.2487, GNorm = 0.1515, lr_0 = 3.6615e-04
Loss = 1.1978e-02, PNorm = 174.2593, GNorm = 0.2189, lr_0 = 3.6590e-04
Loss = 1.0380e-02, PNorm = 174.2713, GNorm = 0.1660, lr_0 = 3.6565e-04
Loss = 1.3660e-02, PNorm = 174.2844, GNorm = 0.2302, lr_0 = 3.6540e-04
Loss = 1.5013e-02, PNorm = 174.2976, GNorm = 0.2437, lr_0 = 3.6515e-04
Loss = 1.3227e-02, PNorm = 174.3119, GNorm = 0.1454, lr_0 = 3.6490e-04
Loss = 1.3961e-02, PNorm = 174.3253, GNorm = 0.2137, lr_0 = 3.6465e-04
Loss = 1.1794e-02, PNorm = 174.3401, GNorm = 0.1582, lr_0 = 3.6440e-04
Loss = 1.4536e-02, PNorm = 174.3527, GNorm = 0.2845, lr_0 = 3.6415e-04
Loss = 1.6746e-02, PNorm = 174.3666, GNorm = 0.3174, lr_0 = 3.6390e-04
Loss = 1.2098e-02, PNorm = 174.3817, GNorm = 0.2090, lr_0 = 3.6365e-04
Loss = 1.3941e-02, PNorm = 174.3963, GNorm = 0.1721, lr_0 = 3.6340e-04
Loss = 1.6405e-02, PNorm = 174.4097, GNorm = 0.2639, lr_0 = 3.6315e-04
Loss = 1.2601e-02, PNorm = 174.4227, GNorm = 0.2413, lr_0 = 3.6290e-04
Loss = 2.1220e-02, PNorm = 174.4407, GNorm = 0.3184, lr_0 = 3.6266e-04
Loss = 1.4155e-02, PNorm = 174.4571, GNorm = 0.5105, lr_0 = 3.6241e-04
Loss = 1.1572e-02, PNorm = 174.4735, GNorm = 0.1969, lr_0 = 3.6216e-04
Loss = 1.2122e-02, PNorm = 174.4862, GNorm = 0.3258, lr_0 = 3.6191e-04
Loss = 1.3502e-02, PNorm = 174.4962, GNorm = 0.3327, lr_0 = 3.6166e-04
Loss = 1.2554e-02, PNorm = 174.5076, GNorm = 0.2561, lr_0 = 3.6141e-04
Loss = 1.3668e-02, PNorm = 174.5219, GNorm = 0.1800, lr_0 = 3.6117e-04
Loss = 3.3689e-02, PNorm = 174.5404, GNorm = 1.0069, lr_0 = 3.6092e-04
Loss = 1.3878e-02, PNorm = 174.5595, GNorm = 0.2180, lr_0 = 3.6067e-04
Loss = 1.4167e-02, PNorm = 174.5790, GNorm = 0.2098, lr_0 = 3.6043e-04
Loss = 1.3926e-02, PNorm = 174.5963, GNorm = 0.3998, lr_0 = 3.6018e-04
Loss = 1.2741e-02, PNorm = 174.6073, GNorm = 0.6128, lr_0 = 3.5993e-04
Loss = 1.0719e-02, PNorm = 174.6194, GNorm = 0.2338, lr_0 = 3.5969e-04
Loss = 1.1943e-02, PNorm = 174.6338, GNorm = 0.2091, lr_0 = 3.5944e-04
Loss = 1.6677e-02, PNorm = 174.6471, GNorm = 0.2361, lr_0 = 3.5919e-04
Loss = 1.5668e-02, PNorm = 174.6578, GNorm = 0.2973, lr_0 = 3.5895e-04
Loss = 1.0516e-02, PNorm = 174.6713, GNorm = 0.2588, lr_0 = 3.5870e-04
Loss = 1.7411e-02, PNorm = 174.6869, GNorm = 0.2926, lr_0 = 3.5845e-04
Loss = 1.6654e-02, PNorm = 174.7026, GNorm = 0.1942, lr_0 = 3.5821e-04
Loss = 1.3100e-02, PNorm = 174.7213, GNorm = 0.2984, lr_0 = 3.5796e-04
Loss = 1.5476e-02, PNorm = 174.7386, GNorm = 0.4014, lr_0 = 3.5772e-04
Loss = 2.0589e-02, PNorm = 174.7535, GNorm = 0.3643, lr_0 = 3.5747e-04
Loss = 2.2930e-02, PNorm = 174.7681, GNorm = 0.4080, lr_0 = 3.5723e-04
Loss = 1.7640e-02, PNorm = 174.7867, GNorm = 0.2711, lr_0 = 3.5698e-04
Loss = 1.1826e-02, PNorm = 174.8094, GNorm = 0.2265, lr_0 = 3.5674e-04
Loss = 1.5239e-02, PNorm = 174.8271, GNorm = 0.2329, lr_0 = 3.5650e-04
Loss = 1.0570e-02, PNorm = 174.8437, GNorm = 0.2237, lr_0 = 3.5625e-04
Loss = 1.4013e-02, PNorm = 174.8591, GNorm = 0.1398, lr_0 = 3.5601e-04
Loss = 1.5012e-02, PNorm = 174.8780, GNorm = 0.1893, lr_0 = 3.5576e-04
Loss = 1.0941e-02, PNorm = 174.8920, GNorm = 0.2151, lr_0 = 3.5552e-04
Loss = 1.0775e-02, PNorm = 174.9078, GNorm = 0.2424, lr_0 = 3.5528e-04
Loss = 1.9953e-02, PNorm = 174.9255, GNorm = 0.3727, lr_0 = 3.5503e-04
Loss = 1.1646e-02, PNorm = 174.9419, GNorm = 0.2767, lr_0 = 3.5479e-04
Loss = 1.2206e-02, PNorm = 174.9606, GNorm = 0.1666, lr_0 = 3.5455e-04
Loss = 1.3091e-02, PNorm = 174.9789, GNorm = 0.1954, lr_0 = 3.5430e-04
Loss = 1.8875e-02, PNorm = 174.9977, GNorm = 0.3189, lr_0 = 3.5406e-04
Loss = 1.8402e-02, PNorm = 175.0125, GNorm = 0.3808, lr_0 = 3.5382e-04
Loss = 1.1794e-02, PNorm = 175.0279, GNorm = 0.2293, lr_0 = 3.5358e-04
Loss = 1.6267e-02, PNorm = 175.0450, GNorm = 0.2960, lr_0 = 3.5333e-04
Loss = 1.1685e-02, PNorm = 175.0631, GNorm = 0.2320, lr_0 = 3.5309e-04
Loss = 1.1568e-02, PNorm = 175.0789, GNorm = 0.2488, lr_0 = 3.5285e-04
Loss = 1.1956e-02, PNorm = 175.0978, GNorm = 0.3483, lr_0 = 3.5261e-04
Loss = 1.2579e-02, PNorm = 175.1165, GNorm = 0.2571, lr_0 = 3.5237e-04
Loss = 1.1163e-02, PNorm = 175.1355, GNorm = 0.1461, lr_0 = 3.5212e-04
Loss = 1.0672e-02, PNorm = 175.1550, GNorm = 0.1464, lr_0 = 3.5188e-04
Loss = 1.8377e-02, PNorm = 175.1715, GNorm = 0.3735, lr_0 = 3.5164e-04
Loss = 1.2044e-02, PNorm = 175.1848, GNorm = 0.1539, lr_0 = 3.5140e-04
Loss = 1.8841e-02, PNorm = 175.2017, GNorm = 0.3785, lr_0 = 3.5116e-04
Loss = 1.9429e-02, PNorm = 175.2158, GNorm = 0.3702, lr_0 = 3.5092e-04
Loss = 1.6253e-02, PNorm = 175.2329, GNorm = 0.1947, lr_0 = 3.5068e-04
Loss = 1.2518e-02, PNorm = 175.2509, GNorm = 0.3402, lr_0 = 3.5044e-04
Loss = 1.3123e-02, PNorm = 175.2657, GNorm = 0.2099, lr_0 = 3.5020e-04
Loss = 1.3741e-02, PNorm = 175.2808, GNorm = 0.2066, lr_0 = 3.4996e-04
Loss = 1.2433e-02, PNorm = 175.2968, GNorm = 0.1505, lr_0 = 3.4972e-04
Loss = 1.4261e-02, PNorm = 175.3146, GNorm = 0.1778, lr_0 = 3.4948e-04
Loss = 1.0677e-02, PNorm = 175.3326, GNorm = 0.1923, lr_0 = 3.4924e-04
Loss = 1.8436e-02, PNorm = 175.3510, GNorm = 0.2632, lr_0 = 3.4900e-04
Loss = 1.3060e-02, PNorm = 175.3713, GNorm = 0.1987, lr_0 = 3.4876e-04
Loss = 1.3138e-02, PNorm = 175.3917, GNorm = 0.3915, lr_0 = 3.4852e-04
Loss = 1.1588e-02, PNorm = 175.4087, GNorm = 0.3585, lr_0 = 3.4828e-04
Loss = 2.1446e-02, PNorm = 175.4277, GNorm = 0.1881, lr_0 = 3.4805e-04
Loss = 1.0540e-02, PNorm = 175.4461, GNorm = 0.1545, lr_0 = 3.4781e-04
Loss = 1.1300e-02, PNorm = 175.4595, GNorm = 0.2031, lr_0 = 3.4757e-04
Loss = 1.5451e-02, PNorm = 175.4762, GNorm = 0.2745, lr_0 = 3.4733e-04
Loss = 2.2074e-02, PNorm = 175.4927, GNorm = 0.2782, lr_0 = 3.4709e-04
Loss = 1.4118e-02, PNorm = 175.5080, GNorm = 0.4025, lr_0 = 3.4686e-04
Loss = 1.9119e-02, PNorm = 175.5236, GNorm = 0.2654, lr_0 = 3.4662e-04
Loss = 1.9440e-02, PNorm = 175.5384, GNorm = 0.2841, lr_0 = 3.4638e-04
Loss = 1.2617e-02, PNorm = 175.5567, GNorm = 0.2706, lr_0 = 3.4614e-04
Loss = 1.0876e-02, PNorm = 175.5741, GNorm = 0.3527, lr_0 = 3.4591e-04
Loss = 1.2109e-02, PNorm = 175.5927, GNorm = 0.2195, lr_0 = 3.4567e-04
Loss = 1.2999e-02, PNorm = 175.6085, GNorm = 0.2465, lr_0 = 3.4543e-04
Loss = 1.7047e-02, PNorm = 175.6241, GNorm = 0.1713, lr_0 = 3.4520e-04
Loss = 1.1756e-02, PNorm = 175.6398, GNorm = 0.1866, lr_0 = 3.4496e-04
Loss = 1.2148e-02, PNorm = 175.6572, GNorm = 0.2044, lr_0 = 3.4472e-04
Loss = 1.3838e-02, PNorm = 175.6714, GNorm = 0.2045, lr_0 = 3.4449e-04
Loss = 1.2894e-02, PNorm = 175.6879, GNorm = 0.1926, lr_0 = 3.4425e-04
Loss = 1.1784e-02, PNorm = 175.7045, GNorm = 0.2091, lr_0 = 3.4402e-04
Loss = 1.3059e-02, PNorm = 175.7251, GNorm = 0.2403, lr_0 = 3.4378e-04
Loss = 1.4673e-02, PNorm = 175.7440, GNorm = 0.1753, lr_0 = 3.4354e-04
Loss = 1.2919e-02, PNorm = 175.7630, GNorm = 0.4004, lr_0 = 3.4331e-04
Validation mae = 0.121496
Epoch 15
Loss = 1.5200e-02, PNorm = 175.7770, GNorm = 0.3063, lr_0 = 3.4307e-04
Loss = 1.2984e-02, PNorm = 175.7861, GNorm = 0.1402, lr_0 = 3.4284e-04
Loss = 1.7288e-02, PNorm = 175.7964, GNorm = 0.4027, lr_0 = 3.4260e-04
Loss = 1.1237e-02, PNorm = 175.8099, GNorm = 0.2285, lr_0 = 3.4237e-04
Loss = 1.2413e-02, PNorm = 175.8227, GNorm = 0.2694, lr_0 = 3.4213e-04
Loss = 1.1225e-02, PNorm = 175.8380, GNorm = 0.2343, lr_0 = 3.4190e-04
Loss = 1.1132e-02, PNorm = 175.8541, GNorm = 0.4236, lr_0 = 3.4167e-04
Loss = 1.1562e-02, PNorm = 175.8698, GNorm = 0.2265, lr_0 = 3.4143e-04
Loss = 1.2678e-02, PNorm = 175.8825, GNorm = 0.6795, lr_0 = 3.4120e-04
Loss = 1.1696e-02, PNorm = 175.8920, GNorm = 0.2566, lr_0 = 3.4096e-04
Loss = 1.2819e-02, PNorm = 175.9042, GNorm = 0.1711, lr_0 = 3.4073e-04
Loss = 1.6470e-02, PNorm = 175.9165, GNorm = 0.2904, lr_0 = 3.4050e-04
Loss = 1.7686e-02, PNorm = 175.9291, GNorm = 0.1718, lr_0 = 3.4026e-04
Loss = 1.2998e-02, PNorm = 175.9418, GNorm = 0.2692, lr_0 = 3.4003e-04
Loss = 1.0506e-02, PNorm = 175.9517, GNorm = 0.1342, lr_0 = 3.3980e-04
Loss = 1.1818e-02, PNorm = 175.9623, GNorm = 0.1945, lr_0 = 3.3956e-04
Loss = 9.4525e-03, PNorm = 175.9759, GNorm = 0.2412, lr_0 = 3.3933e-04
Loss = 1.0465e-02, PNorm = 175.9899, GNorm = 0.2171, lr_0 = 3.3910e-04
Loss = 9.5165e-03, PNorm = 176.0012, GNorm = 0.3182, lr_0 = 3.3887e-04
Loss = 2.5032e-02, PNorm = 176.0149, GNorm = 0.2653, lr_0 = 3.3864e-04
Loss = 9.8166e-03, PNorm = 176.0265, GNorm = 0.1549, lr_0 = 3.3840e-04
Loss = 1.3100e-02, PNorm = 176.0370, GNorm = 0.3052, lr_0 = 3.3817e-04
Loss = 1.8632e-02, PNorm = 176.0547, GNorm = 0.3449, lr_0 = 3.3794e-04
Loss = 1.1815e-02, PNorm = 176.0686, GNorm = 0.4316, lr_0 = 3.3771e-04
Loss = 9.9838e-03, PNorm = 176.0793, GNorm = 0.2288, lr_0 = 3.3748e-04
Loss = 1.5352e-02, PNorm = 176.0938, GNorm = 0.2601, lr_0 = 3.3725e-04
Loss = 1.6335e-02, PNorm = 176.1081, GNorm = 0.1431, lr_0 = 3.3701e-04
Loss = 1.3364e-02, PNorm = 176.1217, GNorm = 0.2364, lr_0 = 3.3678e-04
Loss = 1.0927e-02, PNorm = 176.1361, GNorm = 0.3637, lr_0 = 3.3655e-04
Loss = 1.6608e-02, PNorm = 176.1475, GNorm = 0.1776, lr_0 = 3.3632e-04
Loss = 1.4233e-02, PNorm = 176.1597, GNorm = 0.2498, lr_0 = 3.3609e-04
Loss = 1.0151e-02, PNorm = 176.1714, GNorm = 0.2121, lr_0 = 3.3586e-04
Loss = 1.3330e-02, PNorm = 176.1865, GNorm = 0.2486, lr_0 = 3.3563e-04
Loss = 1.1069e-02, PNorm = 176.2008, GNorm = 0.1860, lr_0 = 3.3540e-04
Loss = 1.1515e-02, PNorm = 176.2114, GNorm = 0.2305, lr_0 = 3.3517e-04
Loss = 1.0725e-02, PNorm = 176.2254, GNorm = 0.2593, lr_0 = 3.3494e-04
Loss = 8.2784e-03, PNorm = 176.2351, GNorm = 0.1196, lr_0 = 3.3471e-04
Loss = 9.8794e-03, PNorm = 176.2456, GNorm = 0.3175, lr_0 = 3.3448e-04
Loss = 1.1902e-02, PNorm = 176.2591, GNorm = 0.2371, lr_0 = 3.3425e-04
Loss = 1.0337e-02, PNorm = 176.2706, GNorm = 0.2024, lr_0 = 3.3403e-04
Loss = 1.0780e-02, PNorm = 176.2798, GNorm = 0.3128, lr_0 = 3.3380e-04
Loss = 1.0685e-02, PNorm = 176.2888, GNorm = 0.2408, lr_0 = 3.3357e-04
Loss = 1.3601e-02, PNorm = 176.2978, GNorm = 0.2398, lr_0 = 3.3334e-04
Loss = 1.2061e-02, PNorm = 176.3073, GNorm = 0.2579, lr_0 = 3.3311e-04
Loss = 9.3729e-03, PNorm = 176.3164, GNorm = 0.1788, lr_0 = 3.3288e-04
Loss = 8.7587e-03, PNorm = 176.3285, GNorm = 0.2849, lr_0 = 3.3265e-04
Loss = 9.8568e-03, PNorm = 176.3397, GNorm = 0.1686, lr_0 = 3.3243e-04
Loss = 8.4077e-03, PNorm = 176.3472, GNorm = 0.2864, lr_0 = 3.3220e-04
Loss = 1.2720e-02, PNorm = 176.3567, GNorm = 0.1326, lr_0 = 3.3197e-04
Loss = 1.4748e-02, PNorm = 176.3691, GNorm = 0.2590, lr_0 = 3.3174e-04
Loss = 1.5301e-02, PNorm = 176.3833, GNorm = 0.3388, lr_0 = 3.3152e-04
Loss = 9.8843e-03, PNorm = 176.3959, GNorm = 0.1706, lr_0 = 3.3129e-04
Loss = 1.5377e-02, PNorm = 176.4108, GNorm = 0.4112, lr_0 = 3.3106e-04
Loss = 1.1087e-02, PNorm = 176.4258, GNorm = 0.2734, lr_0 = 3.3084e-04
Loss = 1.0996e-02, PNorm = 176.4397, GNorm = 0.2571, lr_0 = 3.3061e-04
Loss = 1.2893e-02, PNorm = 176.4526, GNorm = 0.1540, lr_0 = 3.3038e-04
Loss = 1.3864e-02, PNorm = 176.4629, GNorm = 0.3641, lr_0 = 3.3016e-04
Loss = 1.0163e-02, PNorm = 176.4785, GNorm = 0.1717, lr_0 = 3.2993e-04
Loss = 1.4531e-02, PNorm = 176.4915, GNorm = 0.4166, lr_0 = 3.2970e-04
Loss = 1.4035e-02, PNorm = 176.5043, GNorm = 0.2523, lr_0 = 3.2948e-04
Loss = 1.2452e-02, PNorm = 176.5176, GNorm = 0.5127, lr_0 = 3.2925e-04
Loss = 1.0247e-02, PNorm = 176.5344, GNorm = 0.2213, lr_0 = 3.2903e-04
Loss = 1.1357e-02, PNorm = 176.5498, GNorm = 0.2586, lr_0 = 3.2880e-04
Loss = 1.0476e-02, PNorm = 176.5645, GNorm = 0.1372, lr_0 = 3.2858e-04
Loss = 1.7545e-02, PNorm = 176.5809, GNorm = 0.2120, lr_0 = 3.2835e-04
Loss = 1.3642e-02, PNorm = 176.5970, GNorm = 0.2301, lr_0 = 3.2813e-04
Loss = 1.0351e-02, PNorm = 176.6117, GNorm = 0.2785, lr_0 = 3.2790e-04
Loss = 9.7547e-03, PNorm = 176.6270, GNorm = 0.4305, lr_0 = 3.2768e-04
Loss = 7.6109e-03, PNorm = 176.6374, GNorm = 0.2029, lr_0 = 3.2745e-04
Loss = 1.3411e-02, PNorm = 176.6495, GNorm = 0.3408, lr_0 = 3.2723e-04
Loss = 1.4672e-02, PNorm = 176.6634, GNorm = 0.5751, lr_0 = 3.2700e-04
Loss = 1.2055e-02, PNorm = 176.6795, GNorm = 0.3022, lr_0 = 3.2678e-04
Loss = 8.7285e-03, PNorm = 176.6968, GNorm = 0.1558, lr_0 = 3.2656e-04
Loss = 1.1531e-02, PNorm = 176.7091, GNorm = 0.3135, lr_0 = 3.2633e-04
Loss = 1.0344e-02, PNorm = 176.7223, GNorm = 0.1723, lr_0 = 3.2611e-04
Loss = 1.3813e-02, PNorm = 176.7341, GNorm = 0.1415, lr_0 = 3.2589e-04
Loss = 1.0315e-02, PNorm = 176.7430, GNorm = 0.2511, lr_0 = 3.2566e-04
Loss = 1.2283e-02, PNorm = 176.7534, GNorm = 0.1376, lr_0 = 3.2544e-04
Loss = 1.1772e-02, PNorm = 176.7650, GNorm = 0.3691, lr_0 = 3.2522e-04
Loss = 1.1402e-02, PNorm = 176.7778, GNorm = 0.2599, lr_0 = 3.2499e-04
Loss = 1.1727e-02, PNorm = 176.7918, GNorm = 0.1888, lr_0 = 3.2477e-04
Loss = 1.1470e-02, PNorm = 176.8073, GNorm = 0.1318, lr_0 = 3.2455e-04
Loss = 1.0056e-02, PNorm = 176.8213, GNorm = 0.2332, lr_0 = 3.2433e-04
Loss = 9.0046e-03, PNorm = 176.8366, GNorm = 0.1302, lr_0 = 3.2410e-04
Loss = 8.8468e-03, PNorm = 176.8516, GNorm = 0.2240, lr_0 = 3.2388e-04
Loss = 1.2524e-02, PNorm = 176.8640, GNorm = 0.2871, lr_0 = 3.2366e-04
Loss = 9.9411e-03, PNorm = 176.8750, GNorm = 0.1801, lr_0 = 3.2344e-04
Loss = 1.3165e-02, PNorm = 176.8905, GNorm = 0.1579, lr_0 = 3.2322e-04
Loss = 9.8169e-03, PNorm = 176.9049, GNorm = 0.2051, lr_0 = 3.2300e-04
Loss = 1.0809e-02, PNorm = 176.9180, GNorm = 0.5247, lr_0 = 3.2277e-04
Loss = 1.2656e-02, PNorm = 176.9295, GNorm = 0.1311, lr_0 = 3.2255e-04
Loss = 1.0164e-02, PNorm = 176.9426, GNorm = 0.1586, lr_0 = 3.2233e-04
Loss = 1.1354e-02, PNorm = 176.9550, GNorm = 0.2266, lr_0 = 3.2211e-04
Loss = 9.2839e-03, PNorm = 176.9691, GNorm = 0.3489, lr_0 = 3.2189e-04
Loss = 1.3242e-02, PNorm = 176.9798, GNorm = 0.1631, lr_0 = 3.2167e-04
Loss = 1.4111e-02, PNorm = 176.9938, GNorm = 0.5750, lr_0 = 3.2145e-04
Loss = 1.0717e-02, PNorm = 177.0067, GNorm = 0.1975, lr_0 = 3.2123e-04
Loss = 1.2932e-02, PNorm = 177.0231, GNorm = 0.4634, lr_0 = 3.2101e-04
Loss = 1.0381e-02, PNorm = 177.0389, GNorm = 0.1966, lr_0 = 3.2079e-04
Loss = 1.2458e-02, PNorm = 177.0533, GNorm = 0.2301, lr_0 = 3.2057e-04
Loss = 1.5189e-02, PNorm = 177.0695, GNorm = 0.3866, lr_0 = 3.2035e-04
Loss = 1.1333e-02, PNorm = 177.0827, GNorm = 0.2223, lr_0 = 3.2013e-04
Loss = 1.9181e-02, PNorm = 177.0940, GNorm = 0.4310, lr_0 = 3.1991e-04
Loss = 1.6378e-02, PNorm = 177.1094, GNorm = 0.2520, lr_0 = 3.1969e-04
Loss = 1.4677e-02, PNorm = 177.1232, GNorm = 0.2554, lr_0 = 3.1947e-04
Loss = 9.5380e-03, PNorm = 177.1372, GNorm = 0.2605, lr_0 = 3.1925e-04
Loss = 1.4882e-02, PNorm = 177.1499, GNorm = 0.1244, lr_0 = 3.1904e-04
Loss = 1.1005e-02, PNorm = 177.1658, GNorm = 0.2694, lr_0 = 3.1882e-04
Loss = 1.1371e-02, PNorm = 177.1803, GNorm = 0.3078, lr_0 = 3.1860e-04
Loss = 1.2481e-02, PNorm = 177.1966, GNorm = 0.1786, lr_0 = 3.1838e-04
Loss = 1.0222e-02, PNorm = 177.2131, GNorm = 0.2766, lr_0 = 3.1816e-04
Loss = 9.9113e-03, PNorm = 177.2295, GNorm = 0.2113, lr_0 = 3.1794e-04
Loss = 1.8970e-02, PNorm = 177.2467, GNorm = 0.3917, lr_0 = 3.1773e-04
Loss = 1.2865e-02, PNorm = 177.2616, GNorm = 0.1909, lr_0 = 3.1751e-04
Loss = 9.8373e-03, PNorm = 177.2744, GNorm = 0.2716, lr_0 = 3.1729e-04
Loss = 9.3508e-03, PNorm = 177.2873, GNorm = 0.2424, lr_0 = 3.1707e-04
Loss = 1.1891e-02, PNorm = 177.3006, GNorm = 0.2007, lr_0 = 3.1686e-04
Loss = 1.0477e-02, PNorm = 177.3139, GNorm = 0.1299, lr_0 = 3.1664e-04
Loss = 1.6638e-02, PNorm = 177.3311, GNorm = 0.1607, lr_0 = 3.1642e-04
Loss = 1.2617e-02, PNorm = 177.3451, GNorm = 0.5941, lr_0 = 3.1621e-04
Validation mae = 0.121371
Epoch 16
Loss = 1.4119e-02, PNorm = 177.3524, GNorm = 0.2559, lr_0 = 3.1599e-04
Loss = 8.3146e-03, PNorm = 177.3609, GNorm = 0.1537, lr_0 = 3.1577e-04
Loss = 1.1106e-02, PNorm = 177.3702, GNorm = 0.3085, lr_0 = 3.1556e-04
Loss = 1.0033e-02, PNorm = 177.3809, GNorm = 0.3061, lr_0 = 3.1534e-04
Loss = 1.0295e-02, PNorm = 177.3937, GNorm = 0.1207, lr_0 = 3.1512e-04
Loss = 1.4360e-02, PNorm = 177.4050, GNorm = 0.1534, lr_0 = 3.1491e-04
Loss = 9.3960e-03, PNorm = 177.4150, GNorm = 0.1283, lr_0 = 3.1469e-04
Loss = 1.5721e-02, PNorm = 177.4249, GNorm = 0.2031, lr_0 = 3.1448e-04
Loss = 1.0507e-02, PNorm = 177.4361, GNorm = 0.2496, lr_0 = 3.1426e-04
Loss = 9.9043e-03, PNorm = 177.4450, GNorm = 0.5853, lr_0 = 3.1405e-04
Loss = 1.3018e-02, PNorm = 177.4566, GNorm = 0.2582, lr_0 = 3.1383e-04
Loss = 9.9661e-03, PNorm = 177.4669, GNorm = 0.1370, lr_0 = 3.1362e-04
Loss = 9.5672e-03, PNorm = 177.4767, GNorm = 0.2037, lr_0 = 3.1340e-04
Loss = 1.0563e-02, PNorm = 177.4869, GNorm = 0.2343, lr_0 = 3.1319e-04
Loss = 8.7805e-03, PNorm = 177.4987, GNorm = 0.2675, lr_0 = 3.1297e-04
Loss = 1.0095e-02, PNorm = 177.5093, GNorm = 0.1837, lr_0 = 3.1276e-04
Loss = 1.2821e-02, PNorm = 177.5187, GNorm = 0.2372, lr_0 = 3.1254e-04
Loss = 7.9726e-03, PNorm = 177.5277, GNorm = 0.3078, lr_0 = 3.1233e-04
Loss = 8.4593e-03, PNorm = 177.5381, GNorm = 0.2502, lr_0 = 3.1212e-04
Loss = 8.8242e-03, PNorm = 177.5463, GNorm = 0.2431, lr_0 = 3.1190e-04
Loss = 7.8277e-03, PNorm = 177.5569, GNorm = 0.2078, lr_0 = 3.1169e-04
Loss = 1.0918e-02, PNorm = 177.5660, GNorm = 0.2063, lr_0 = 3.1147e-04
Loss = 1.1794e-02, PNorm = 177.5764, GNorm = 0.1330, lr_0 = 3.1126e-04
Loss = 1.0048e-02, PNorm = 177.5821, GNorm = 0.3158, lr_0 = 3.1105e-04
Loss = 1.1145e-02, PNorm = 177.5890, GNorm = 0.5175, lr_0 = 3.1083e-04
Loss = 1.1524e-02, PNorm = 177.5968, GNorm = 0.3101, lr_0 = 3.1062e-04
Loss = 7.9896e-03, PNorm = 177.6099, GNorm = 0.3303, lr_0 = 3.1041e-04
Loss = 7.7321e-03, PNorm = 177.6221, GNorm = 0.2886, lr_0 = 3.1020e-04
Loss = 8.5105e-03, PNorm = 177.6328, GNorm = 0.1779, lr_0 = 3.0998e-04
Loss = 8.3358e-03, PNorm = 177.6458, GNorm = 0.2304, lr_0 = 3.0977e-04
Loss = 8.9776e-03, PNorm = 177.6568, GNorm = 0.1488, lr_0 = 3.0956e-04
Loss = 1.2646e-02, PNorm = 177.6654, GNorm = 0.1877, lr_0 = 3.0935e-04
Loss = 8.1696e-03, PNorm = 177.6769, GNorm = 0.1976, lr_0 = 3.0914e-04
Loss = 8.2487e-03, PNorm = 177.6883, GNorm = 0.1334, lr_0 = 3.0892e-04
Loss = 1.0033e-02, PNorm = 177.6979, GNorm = 0.1649, lr_0 = 3.0871e-04
Loss = 8.6585e-03, PNorm = 177.7090, GNorm = 0.1843, lr_0 = 3.0850e-04
Loss = 1.1552e-02, PNorm = 177.7178, GNorm = 0.2393, lr_0 = 3.0829e-04
Loss = 1.0610e-02, PNorm = 177.7272, GNorm = 0.4022, lr_0 = 3.0808e-04
Loss = 7.7879e-03, PNorm = 177.7361, GNorm = 0.1683, lr_0 = 3.0787e-04
Loss = 1.1137e-02, PNorm = 177.7452, GNorm = 0.2347, lr_0 = 3.0766e-04
Loss = 1.2117e-02, PNorm = 177.7544, GNorm = 0.1295, lr_0 = 3.0745e-04
Loss = 1.0101e-02, PNorm = 177.7634, GNorm = 0.2805, lr_0 = 3.0723e-04
Loss = 1.1956e-02, PNorm = 177.7723, GNorm = 0.1708, lr_0 = 3.0702e-04
Loss = 1.0336e-02, PNorm = 177.7841, GNorm = 0.4657, lr_0 = 3.0681e-04
Loss = 8.7314e-03, PNorm = 177.7930, GNorm = 0.3788, lr_0 = 3.0660e-04
Loss = 8.5337e-03, PNorm = 177.8031, GNorm = 0.1778, lr_0 = 3.0639e-04
Loss = 7.0902e-03, PNorm = 177.8143, GNorm = 0.1985, lr_0 = 3.0618e-04
Loss = 1.5669e-02, PNorm = 177.8217, GNorm = 0.1326, lr_0 = 3.0597e-04
Loss = 1.0333e-02, PNorm = 177.8299, GNorm = 0.2221, lr_0 = 3.0576e-04
Loss = 1.0131e-02, PNorm = 177.8410, GNorm = 0.4467, lr_0 = 3.0555e-04
Loss = 9.3691e-03, PNorm = 177.8546, GNorm = 0.1103, lr_0 = 3.0535e-04
Loss = 9.3268e-03, PNorm = 177.8670, GNorm = 0.1781, lr_0 = 3.0514e-04
Loss = 1.0290e-02, PNorm = 177.8755, GNorm = 0.2449, lr_0 = 3.0493e-04
Loss = 1.3837e-02, PNorm = 177.8823, GNorm = 0.2042, lr_0 = 3.0472e-04
Loss = 9.2709e-03, PNorm = 177.8923, GNorm = 0.1135, lr_0 = 3.0451e-04
Loss = 1.1073e-02, PNorm = 177.9012, GNorm = 0.1878, lr_0 = 3.0430e-04
Loss = 1.0766e-02, PNorm = 177.9106, GNorm = 0.2613, lr_0 = 3.0409e-04
Loss = 1.0729e-02, PNorm = 177.9206, GNorm = 0.2853, lr_0 = 3.0388e-04
Loss = 9.3864e-03, PNorm = 177.9309, GNorm = 0.2067, lr_0 = 3.0368e-04
Loss = 8.3098e-03, PNorm = 177.9446, GNorm = 0.1973, lr_0 = 3.0347e-04
Loss = 1.2300e-02, PNorm = 177.9578, GNorm = 0.2279, lr_0 = 3.0326e-04
Loss = 8.1034e-03, PNorm = 177.9712, GNorm = 0.2237, lr_0 = 3.0305e-04
Loss = 1.2394e-02, PNorm = 177.9838, GNorm = 0.2913, lr_0 = 3.0284e-04
Loss = 9.6358e-03, PNorm = 177.9933, GNorm = 0.1660, lr_0 = 3.0264e-04
Loss = 7.6099e-03, PNorm = 178.0053, GNorm = 0.3532, lr_0 = 3.0243e-04
Loss = 8.2506e-03, PNorm = 178.0141, GNorm = 0.2812, lr_0 = 3.0222e-04
Loss = 2.0039e-02, PNorm = 178.0244, GNorm = 0.1188, lr_0 = 3.0202e-04
Loss = 8.6351e-03, PNorm = 178.0352, GNorm = 0.1795, lr_0 = 3.0181e-04
Loss = 1.0193e-02, PNorm = 178.0472, GNorm = 0.4394, lr_0 = 3.0160e-04
Loss = 8.5154e-03, PNorm = 178.0622, GNorm = 0.2805, lr_0 = 3.0140e-04
Loss = 9.2402e-03, PNorm = 178.0737, GNorm = 0.2820, lr_0 = 3.0119e-04
Loss = 1.2683e-02, PNorm = 178.0879, GNorm = 0.1930, lr_0 = 3.0098e-04
Loss = 2.1038e-02, PNorm = 178.0968, GNorm = 0.2432, lr_0 = 3.0078e-04
Loss = 7.3226e-03, PNorm = 178.1097, GNorm = 0.2414, lr_0 = 3.0057e-04
Loss = 8.7878e-03, PNorm = 178.1231, GNorm = 0.1522, lr_0 = 3.0036e-04
Loss = 1.2797e-02, PNorm = 178.1355, GNorm = 0.2271, lr_0 = 3.0016e-04
Loss = 1.6214e-02, PNorm = 178.1493, GNorm = 0.2520, lr_0 = 2.9995e-04
Loss = 7.9214e-03, PNorm = 178.1604, GNorm = 0.1292, lr_0 = 2.9975e-04
Loss = 9.5527e-03, PNorm = 178.1699, GNorm = 0.3630, lr_0 = 2.9954e-04
Loss = 1.1530e-02, PNorm = 178.1847, GNorm = 0.2827, lr_0 = 2.9934e-04
Loss = 1.2505e-02, PNorm = 178.1933, GNorm = 0.3914, lr_0 = 2.9913e-04
Loss = 7.7391e-03, PNorm = 178.2027, GNorm = 0.1353, lr_0 = 2.9893e-04
Loss = 8.5214e-03, PNorm = 178.2116, GNorm = 0.1788, lr_0 = 2.9872e-04
Loss = 1.3766e-02, PNorm = 178.2222, GNorm = 0.2664, lr_0 = 2.9852e-04
Loss = 1.0349e-02, PNorm = 178.2337, GNorm = 0.1259, lr_0 = 2.9831e-04
Loss = 1.5603e-02, PNorm = 178.2477, GNorm = 0.2081, lr_0 = 2.9811e-04
Loss = 1.0683e-02, PNorm = 178.2609, GNorm = 0.3177, lr_0 = 2.9790e-04
Loss = 1.0884e-02, PNorm = 178.2751, GNorm = 0.1739, lr_0 = 2.9770e-04
Loss = 8.7598e-03, PNorm = 178.2845, GNorm = 0.1410, lr_0 = 2.9750e-04
Loss = 9.7632e-03, PNorm = 178.2955, GNorm = 0.1858, lr_0 = 2.9729e-04
Loss = 8.1864e-03, PNorm = 178.3074, GNorm = 0.3581, lr_0 = 2.9709e-04
Loss = 1.0516e-02, PNorm = 178.3192, GNorm = 0.1502, lr_0 = 2.9689e-04
Loss = 8.9722e-03, PNorm = 178.3296, GNorm = 0.1760, lr_0 = 2.9668e-04
Loss = 8.8273e-03, PNorm = 178.3399, GNorm = 0.3850, lr_0 = 2.9648e-04
Loss = 7.5653e-03, PNorm = 178.3510, GNorm = 0.1398, lr_0 = 2.9628e-04
Loss = 1.3563e-02, PNorm = 178.3624, GNorm = 0.2840, lr_0 = 2.9607e-04
Loss = 9.1257e-03, PNorm = 178.3748, GNorm = 0.2330, lr_0 = 2.9587e-04
Loss = 2.2859e-02, PNorm = 178.3851, GNorm = 0.1501, lr_0 = 2.9567e-04
Loss = 1.7913e-02, PNorm = 178.3968, GNorm = 0.2425, lr_0 = 2.9546e-04
Loss = 7.3525e-03, PNorm = 178.4080, GNorm = 0.1402, lr_0 = 2.9526e-04
Loss = 8.7790e-03, PNorm = 178.4200, GNorm = 0.1325, lr_0 = 2.9506e-04
Loss = 9.3749e-03, PNorm = 178.4308, GNorm = 0.0993, lr_0 = 2.9486e-04
Loss = 1.3189e-02, PNorm = 178.4410, GNorm = 0.6578, lr_0 = 2.9466e-04
Loss = 8.1239e-03, PNorm = 178.4529, GNorm = 0.1302, lr_0 = 2.9445e-04
Loss = 8.1800e-03, PNorm = 178.4628, GNorm = 0.3556, lr_0 = 2.9425e-04
Loss = 8.1244e-03, PNorm = 178.4762, GNorm = 0.1822, lr_0 = 2.9405e-04
Loss = 1.1524e-02, PNorm = 178.4910, GNorm = 0.7732, lr_0 = 2.9385e-04
Loss = 9.2797e-03, PNorm = 178.5009, GNorm = 0.1643, lr_0 = 2.9365e-04
Loss = 2.7312e-02, PNorm = 178.5163, GNorm = 0.2492, lr_0 = 2.9345e-04
Loss = 1.2241e-02, PNorm = 178.5269, GNorm = 0.1919, lr_0 = 2.9325e-04
Loss = 8.6087e-03, PNorm = 178.5378, GNorm = 0.2226, lr_0 = 2.9305e-04
Loss = 9.4219e-03, PNorm = 178.5510, GNorm = 0.2129, lr_0 = 2.9284e-04
Loss = 1.1700e-02, PNorm = 178.5618, GNorm = 0.4394, lr_0 = 2.9264e-04
Loss = 7.8460e-03, PNorm = 178.5721, GNorm = 0.2506, lr_0 = 2.9244e-04
Loss = 1.6201e-02, PNorm = 178.5805, GNorm = 0.2876, lr_0 = 2.9224e-04
Loss = 1.4408e-02, PNorm = 178.5933, GNorm = 0.3106, lr_0 = 2.9204e-04
Loss = 1.2602e-02, PNorm = 178.6065, GNorm = 0.1001, lr_0 = 2.9184e-04
Loss = 1.1961e-02, PNorm = 178.6173, GNorm = 0.3255, lr_0 = 2.9164e-04
Loss = 1.0918e-02, PNorm = 178.6291, GNorm = 0.1643, lr_0 = 2.9144e-04
Loss = 1.2910e-02, PNorm = 178.6389, GNorm = 0.1264, lr_0 = 2.9124e-04
Validation mae = 0.121216
Epoch 17
Loss = 8.6744e-03, PNorm = 178.6530, GNorm = 0.1610, lr_0 = 2.9104e-04
Loss = 9.8038e-03, PNorm = 178.6655, GNorm = 0.4179, lr_0 = 2.9084e-04
Loss = 9.0927e-03, PNorm = 178.6729, GNorm = 0.1994, lr_0 = 2.9065e-04
Loss = 8.3839e-03, PNorm = 178.6828, GNorm = 0.3027, lr_0 = 2.9045e-04
Loss = 1.1091e-02, PNorm = 178.6898, GNorm = 0.1289, lr_0 = 2.9025e-04
Loss = 8.6954e-03, PNorm = 178.6979, GNorm = 0.1610, lr_0 = 2.9005e-04
Loss = 9.7303e-03, PNorm = 178.7082, GNorm = 0.6885, lr_0 = 2.8985e-04
Loss = 9.3478e-03, PNorm = 178.7158, GNorm = 0.2413, lr_0 = 2.8965e-04
Loss = 7.8516e-03, PNorm = 178.7224, GNorm = 0.2084, lr_0 = 2.8945e-04
Loss = 1.0097e-02, PNorm = 178.7304, GNorm = 0.1524, lr_0 = 2.8925e-04
Loss = 7.6215e-03, PNorm = 178.7394, GNorm = 0.1738, lr_0 = 2.8906e-04
Loss = 7.1976e-03, PNorm = 178.7495, GNorm = 0.3115, lr_0 = 2.8886e-04
Loss = 1.0911e-02, PNorm = 178.7575, GNorm = 0.4427, lr_0 = 2.8866e-04
Loss = 1.0804e-02, PNorm = 178.7657, GNorm = 0.5496, lr_0 = 2.8846e-04
Loss = 7.4555e-03, PNorm = 178.7765, GNorm = 0.2057, lr_0 = 2.8826e-04
Loss = 9.4384e-03, PNorm = 178.7851, GNorm = 0.1523, lr_0 = 2.8807e-04
Loss = 9.7114e-03, PNorm = 178.7939, GNorm = 0.2506, lr_0 = 2.8787e-04
Loss = 7.8581e-03, PNorm = 178.8039, GNorm = 0.2081, lr_0 = 2.8767e-04
Loss = 9.5951e-03, PNorm = 178.8129, GNorm = 0.2591, lr_0 = 2.8748e-04
Loss = 1.0928e-02, PNorm = 178.8206, GNorm = 0.1744, lr_0 = 2.8728e-04
Loss = 6.3788e-03, PNorm = 178.8279, GNorm = 0.1857, lr_0 = 2.8708e-04
Loss = 1.5163e-02, PNorm = 178.8357, GNorm = 0.4157, lr_0 = 2.8689e-04
Loss = 7.4621e-03, PNorm = 178.8424, GNorm = 0.5698, lr_0 = 2.8669e-04
Loss = 7.3331e-03, PNorm = 178.8500, GNorm = 0.3540, lr_0 = 2.8649e-04
Loss = 1.0370e-02, PNorm = 178.8572, GNorm = 0.1910, lr_0 = 2.8630e-04
Loss = 7.2884e-03, PNorm = 178.8678, GNorm = 0.2358, lr_0 = 2.8610e-04
Loss = 9.0548e-03, PNorm = 178.8757, GNorm = 0.1201, lr_0 = 2.8590e-04
Loss = 6.4577e-03, PNorm = 178.8850, GNorm = 0.2120, lr_0 = 2.8571e-04
Loss = 1.0299e-02, PNorm = 178.8939, GNorm = 0.3463, lr_0 = 2.8551e-04
Loss = 1.3109e-02, PNorm = 178.9023, GNorm = 0.2555, lr_0 = 2.8532e-04
Loss = 1.7505e-02, PNorm = 178.9124, GNorm = 0.4103, lr_0 = 2.8512e-04
Loss = 2.4892e-02, PNorm = 178.9212, GNorm = 0.3482, lr_0 = 2.8493e-04
Loss = 1.5036e-02, PNorm = 178.9277, GNorm = 0.2594, lr_0 = 2.8473e-04
Loss = 1.0652e-02, PNorm = 178.9353, GNorm = 0.3566, lr_0 = 2.8454e-04
Loss = 9.0465e-03, PNorm = 178.9466, GNorm = 0.3101, lr_0 = 2.8434e-04
Loss = 1.1878e-02, PNorm = 178.9597, GNorm = 0.1397, lr_0 = 2.8415e-04
Loss = 1.3245e-02, PNorm = 178.9707, GNorm = 0.4089, lr_0 = 2.8395e-04
Loss = 8.7007e-03, PNorm = 178.9787, GNorm = 0.2130, lr_0 = 2.8376e-04
Loss = 7.1856e-03, PNorm = 178.9870, GNorm = 0.1437, lr_0 = 2.8356e-04
Loss = 6.7382e-03, PNorm = 178.9971, GNorm = 0.3072, lr_0 = 2.8337e-04
Loss = 7.6424e-03, PNorm = 179.0070, GNorm = 0.1665, lr_0 = 2.8317e-04
Loss = 8.9104e-03, PNorm = 179.0146, GNorm = 0.1818, lr_0 = 2.8298e-04
Loss = 1.5698e-02, PNorm = 179.0237, GNorm = 0.2105, lr_0 = 2.8279e-04
Loss = 1.0282e-02, PNorm = 179.0342, GNorm = 0.2052, lr_0 = 2.8259e-04
Loss = 6.9021e-03, PNorm = 179.0435, GNorm = 0.1591, lr_0 = 2.8240e-04
Loss = 8.0009e-03, PNorm = 179.0524, GNorm = 0.1326, lr_0 = 2.8221e-04
Loss = 1.0097e-02, PNorm = 179.0602, GNorm = 1.2025, lr_0 = 2.8201e-04
Loss = 1.0495e-02, PNorm = 179.0666, GNorm = 0.1379, lr_0 = 2.8182e-04
Loss = 8.6458e-03, PNorm = 179.0740, GNorm = 0.2017, lr_0 = 2.8163e-04
Loss = 8.4891e-03, PNorm = 179.0823, GNorm = 0.2196, lr_0 = 2.8143e-04
Loss = 7.6430e-03, PNorm = 179.0948, GNorm = 0.2539, lr_0 = 2.8124e-04
Loss = 7.0626e-03, PNorm = 179.1035, GNorm = 0.1367, lr_0 = 2.8105e-04
Loss = 1.0331e-02, PNorm = 179.1101, GNorm = 0.3001, lr_0 = 2.8085e-04
Loss = 7.0280e-03, PNorm = 179.1185, GNorm = 0.1195, lr_0 = 2.8066e-04
Loss = 8.6589e-03, PNorm = 179.1278, GNorm = 0.2033, lr_0 = 2.8047e-04
Loss = 7.4475e-03, PNorm = 179.1372, GNorm = 0.1512, lr_0 = 2.8028e-04
Loss = 1.3352e-02, PNorm = 179.1435, GNorm = 0.1529, lr_0 = 2.8009e-04
Loss = 8.1338e-03, PNorm = 179.1517, GNorm = 0.1518, lr_0 = 2.7989e-04
Loss = 9.3168e-03, PNorm = 179.1618, GNorm = 0.2224, lr_0 = 2.7970e-04
Loss = 1.4159e-02, PNorm = 179.1740, GNorm = 0.1191, lr_0 = 2.7951e-04
Loss = 6.4561e-03, PNorm = 179.1825, GNorm = 0.0860, lr_0 = 2.7932e-04
Loss = 6.8919e-03, PNorm = 179.1921, GNorm = 0.1502, lr_0 = 2.7913e-04
Loss = 7.7154e-03, PNorm = 179.2024, GNorm = 0.3278, lr_0 = 2.7894e-04
Loss = 1.4097e-02, PNorm = 179.2113, GNorm = 0.1563, lr_0 = 2.7875e-04
Loss = 8.4736e-03, PNorm = 179.2190, GNorm = 0.3486, lr_0 = 2.7855e-04
Loss = 8.2297e-03, PNorm = 179.2274, GNorm = 0.3224, lr_0 = 2.7836e-04
Loss = 9.6098e-03, PNorm = 179.2352, GNorm = 0.1335, lr_0 = 2.7817e-04
Loss = 8.0348e-03, PNorm = 179.2446, GNorm = 0.1893, lr_0 = 2.7798e-04
Loss = 9.0469e-03, PNorm = 179.2542, GNorm = 0.3022, lr_0 = 2.7779e-04
Loss = 7.2464e-03, PNorm = 179.2663, GNorm = 0.2043, lr_0 = 2.7760e-04
Loss = 7.6190e-03, PNorm = 179.2779, GNorm = 0.4054, lr_0 = 2.7741e-04
Loss = 6.6873e-03, PNorm = 179.2856, GNorm = 0.2769, lr_0 = 2.7722e-04
Loss = 6.3635e-03, PNorm = 179.2935, GNorm = 0.2096, lr_0 = 2.7703e-04
Loss = 7.9700e-03, PNorm = 179.3045, GNorm = 0.1949, lr_0 = 2.7684e-04
Loss = 6.5366e-03, PNorm = 179.3154, GNorm = 0.1992, lr_0 = 2.7665e-04
Loss = 9.9772e-03, PNorm = 179.3239, GNorm = 0.2482, lr_0 = 2.7646e-04
Loss = 8.7703e-03, PNorm = 179.3306, GNorm = 0.2518, lr_0 = 2.7627e-04
Loss = 8.2207e-03, PNorm = 179.3393, GNorm = 0.2719, lr_0 = 2.7608e-04
Loss = 7.9860e-03, PNorm = 179.3473, GNorm = 0.0974, lr_0 = 2.7590e-04
Loss = 9.0583e-03, PNorm = 179.3560, GNorm = 0.1131, lr_0 = 2.7571e-04
Loss = 6.3726e-03, PNorm = 179.3658, GNorm = 0.1694, lr_0 = 2.7552e-04
Loss = 1.2381e-02, PNorm = 179.3786, GNorm = 0.3219, lr_0 = 2.7533e-04
Loss = 9.7555e-03, PNorm = 179.3899, GNorm = 0.2787, lr_0 = 2.7514e-04
Loss = 8.7921e-03, PNorm = 179.3947, GNorm = 0.1570, lr_0 = 2.7495e-04
Loss = 8.4650e-03, PNorm = 179.4051, GNorm = 0.1997, lr_0 = 2.7476e-04
Loss = 9.1978e-03, PNorm = 179.4164, GNorm = 0.3122, lr_0 = 2.7457e-04
Loss = 7.3844e-03, PNorm = 179.4252, GNorm = 0.2548, lr_0 = 2.7439e-04
Loss = 7.0806e-03, PNorm = 179.4345, GNorm = 0.2508, lr_0 = 2.7420e-04
Loss = 6.5155e-03, PNorm = 179.4449, GNorm = 0.1931, lr_0 = 2.7401e-04
Loss = 9.7311e-03, PNorm = 179.4541, GNorm = 0.1707, lr_0 = 2.7382e-04
Loss = 6.4720e-03, PNorm = 179.4626, GNorm = 0.0862, lr_0 = 2.7364e-04
Loss = 9.4586e-03, PNorm = 179.4706, GNorm = 0.1060, lr_0 = 2.7345e-04
Loss = 7.0262e-03, PNorm = 179.4783, GNorm = 0.1942, lr_0 = 2.7326e-04
Loss = 9.0051e-03, PNorm = 179.4888, GNorm = 0.1327, lr_0 = 2.7307e-04
Loss = 9.5535e-03, PNorm = 179.4987, GNorm = 0.2701, lr_0 = 2.7289e-04
Loss = 9.7181e-03, PNorm = 179.5092, GNorm = 0.1540, lr_0 = 2.7270e-04
Loss = 7.4562e-03, PNorm = 179.5186, GNorm = 0.1355, lr_0 = 2.7251e-04
Loss = 7.3375e-03, PNorm = 179.5294, GNorm = 0.1421, lr_0 = 2.7233e-04
Loss = 8.9677e-03, PNorm = 179.5399, GNorm = 0.1791, lr_0 = 2.7214e-04
Loss = 7.8448e-03, PNorm = 179.5522, GNorm = 0.1244, lr_0 = 2.7195e-04
Loss = 1.5789e-02, PNorm = 179.5624, GNorm = 0.2282, lr_0 = 2.7177e-04
Loss = 6.8031e-03, PNorm = 179.5692, GNorm = 0.0971, lr_0 = 2.7158e-04
Loss = 7.2688e-03, PNorm = 179.5792, GNorm = 0.1335, lr_0 = 2.7139e-04
Loss = 1.2049e-02, PNorm = 179.5878, GNorm = 0.3812, lr_0 = 2.7121e-04
Loss = 1.4216e-02, PNorm = 179.5946, GNorm = 0.2750, lr_0 = 2.7102e-04
Loss = 1.5189e-02, PNorm = 179.6057, GNorm = 0.1217, lr_0 = 2.7084e-04
Loss = 7.9133e-03, PNorm = 179.6186, GNorm = 0.1890, lr_0 = 2.7065e-04
Loss = 1.2366e-02, PNorm = 179.6310, GNorm = 0.1704, lr_0 = 2.7047e-04
Loss = 9.7019e-03, PNorm = 179.6427, GNorm = 0.1533, lr_0 = 2.7028e-04
Loss = 7.9822e-03, PNorm = 179.6485, GNorm = 0.1848, lr_0 = 2.7010e-04
Loss = 1.1650e-02, PNorm = 179.6585, GNorm = 0.1504, lr_0 = 2.6991e-04
Loss = 1.5972e-02, PNorm = 179.6663, GNorm = 0.2146, lr_0 = 2.6973e-04
Loss = 8.7308e-03, PNorm = 179.6763, GNorm = 0.6836, lr_0 = 2.6954e-04
Loss = 1.0756e-02, PNorm = 179.6860, GNorm = 0.3862, lr_0 = 2.6936e-04
Loss = 7.6577e-03, PNorm = 179.6968, GNorm = 0.0940, lr_0 = 2.6917e-04
Loss = 9.2131e-03, PNorm = 179.7047, GNorm = 0.2270, lr_0 = 2.6899e-04
Loss = 8.8909e-03, PNorm = 179.7136, GNorm = 0.5483, lr_0 = 2.6880e-04
Loss = 6.1360e-03, PNorm = 179.7247, GNorm = 0.1091, lr_0 = 2.6862e-04
Loss = 1.0064e-02, PNorm = 179.7362, GNorm = 0.1602, lr_0 = 2.6844e-04
Loss = 1.0078e-02, PNorm = 179.7458, GNorm = 0.2080, lr_0 = 2.6825e-04
Validation mae = 0.120816
Epoch 18
Loss = 6.2508e-03, PNorm = 179.7534, GNorm = 0.1217, lr_0 = 2.6807e-04
Loss = 6.2678e-03, PNorm = 179.7597, GNorm = 0.3740, lr_0 = 2.6788e-04
Loss = 5.6273e-03, PNorm = 179.7669, GNorm = 0.1708, lr_0 = 2.6770e-04
Loss = 5.9054e-03, PNorm = 179.7729, GNorm = 0.2507, lr_0 = 2.6752e-04
Loss = 9.1516e-03, PNorm = 179.7797, GNorm = 0.1472, lr_0 = 2.6733e-04
Loss = 6.2807e-03, PNorm = 179.7869, GNorm = 0.2510, lr_0 = 2.6715e-04
Loss = 6.6588e-03, PNorm = 179.7947, GNorm = 0.1805, lr_0 = 2.6697e-04
Loss = 6.9241e-03, PNorm = 179.8032, GNorm = 0.2949, lr_0 = 2.6678e-04
Loss = 6.8622e-03, PNorm = 179.8107, GNorm = 0.1677, lr_0 = 2.6660e-04
Loss = 1.0978e-02, PNorm = 179.8171, GNorm = 0.2414, lr_0 = 2.6642e-04
Loss = 6.4944e-03, PNorm = 179.8248, GNorm = 0.1361, lr_0 = 2.6624e-04
Loss = 5.9857e-03, PNorm = 179.8296, GNorm = 0.1851, lr_0 = 2.6605e-04
Loss = 7.9684e-03, PNorm = 179.8347, GNorm = 0.1818, lr_0 = 2.6587e-04
Loss = 6.7887e-03, PNorm = 179.8429, GNorm = 0.1875, lr_0 = 2.6569e-04
Loss = 8.0712e-03, PNorm = 179.8496, GNorm = 0.2348, lr_0 = 2.6551e-04
Loss = 1.0474e-02, PNorm = 179.8583, GNorm = 0.1202, lr_0 = 2.6533e-04
Loss = 6.9391e-03, PNorm = 179.8672, GNorm = 0.3188, lr_0 = 2.6514e-04
Loss = 9.0365e-03, PNorm = 179.8773, GNorm = 0.2792, lr_0 = 2.6496e-04
Loss = 6.1222e-03, PNorm = 179.8859, GNorm = 0.1412, lr_0 = 2.6478e-04
Loss = 5.6922e-03, PNorm = 179.8953, GNorm = 0.1453, lr_0 = 2.6460e-04
Loss = 6.7353e-03, PNorm = 179.9039, GNorm = 0.3428, lr_0 = 2.6442e-04
Loss = 8.3464e-03, PNorm = 179.9099, GNorm = 0.1410, lr_0 = 2.6424e-04
Loss = 7.3612e-03, PNorm = 179.9176, GNorm = 0.1315, lr_0 = 2.6406e-04
Loss = 8.7687e-03, PNorm = 179.9276, GNorm = 0.3431, lr_0 = 2.6388e-04
Loss = 6.0301e-03, PNorm = 179.9356, GNorm = 0.2072, lr_0 = 2.6369e-04
Loss = 1.0214e-02, PNorm = 179.9448, GNorm = 0.9272, lr_0 = 2.6351e-04
Loss = 8.3596e-03, PNorm = 179.9492, GNorm = 0.1236, lr_0 = 2.6333e-04
Loss = 9.7850e-03, PNorm = 179.9569, GNorm = 0.1197, lr_0 = 2.6315e-04
Loss = 6.9329e-03, PNorm = 179.9637, GNorm = 0.2160, lr_0 = 2.6297e-04
Loss = 6.1758e-03, PNorm = 179.9707, GNorm = 0.1283, lr_0 = 2.6279e-04
Loss = 7.2032e-03, PNorm = 179.9773, GNorm = 0.2007, lr_0 = 2.6261e-04
Loss = 6.1585e-03, PNorm = 179.9840, GNorm = 0.1513, lr_0 = 2.6243e-04
Loss = 1.2170e-02, PNorm = 179.9897, GNorm = 0.2674, lr_0 = 2.6225e-04
Loss = 7.0846e-03, PNorm = 179.9990, GNorm = 0.4967, lr_0 = 2.6207e-04
Loss = 7.4129e-03, PNorm = 180.0078, GNorm = 0.1303, lr_0 = 2.6189e-04
Loss = 5.7616e-03, PNorm = 180.0173, GNorm = 0.1844, lr_0 = 2.6171e-04
Loss = 9.4499e-03, PNorm = 180.0269, GNorm = 0.2165, lr_0 = 2.6153e-04
Loss = 7.4877e-03, PNorm = 180.0304, GNorm = 0.1317, lr_0 = 2.6136e-04
Loss = 7.9821e-03, PNorm = 180.0378, GNorm = 0.3390, lr_0 = 2.6118e-04
Loss = 7.2973e-03, PNorm = 180.0442, GNorm = 0.0925, lr_0 = 2.6100e-04
Loss = 7.5030e-03, PNorm = 180.0503, GNorm = 0.1490, lr_0 = 2.6082e-04
Loss = 8.2615e-03, PNorm = 180.0594, GNorm = 0.4945, lr_0 = 2.6064e-04
Loss = 7.2153e-03, PNorm = 180.0671, GNorm = 0.2184, lr_0 = 2.6046e-04
Loss = 6.2051e-03, PNorm = 180.0745, GNorm = 0.1415, lr_0 = 2.6028e-04
Loss = 6.3307e-03, PNorm = 180.0823, GNorm = 0.1946, lr_0 = 2.6011e-04
Loss = 6.2734e-03, PNorm = 180.0901, GNorm = 0.3020, lr_0 = 2.5993e-04
Loss = 9.7087e-03, PNorm = 180.0991, GNorm = 0.2343, lr_0 = 2.5975e-04
Loss = 7.7932e-03, PNorm = 180.1067, GNorm = 0.2418, lr_0 = 2.5957e-04
Loss = 9.7165e-03, PNorm = 180.1166, GNorm = 0.1795, lr_0 = 2.5939e-04
Loss = 9.4982e-03, PNorm = 180.1266, GNorm = 0.3321, lr_0 = 2.5922e-04
Loss = 4.8148e-03, PNorm = 180.1334, GNorm = 0.2245, lr_0 = 2.5904e-04
Loss = 1.9989e-02, PNorm = 180.1408, GNorm = 2.3507, lr_0 = 2.5886e-04
Loss = 9.3731e-03, PNorm = 180.1493, GNorm = 0.4326, lr_0 = 2.5868e-04
Loss = 9.0516e-03, PNorm = 180.1558, GNorm = 0.1323, lr_0 = 2.5851e-04
Loss = 8.0591e-03, PNorm = 180.1640, GNorm = 0.1587, lr_0 = 2.5833e-04
Loss = 8.4243e-03, PNorm = 180.1698, GNorm = 0.1486, lr_0 = 2.5815e-04
Loss = 6.8058e-03, PNorm = 180.1765, GNorm = 0.3035, lr_0 = 2.5797e-04
Loss = 7.4571e-03, PNorm = 180.1867, GNorm = 0.3453, lr_0 = 2.5780e-04
Loss = 8.5413e-03, PNorm = 180.1909, GNorm = 0.2302, lr_0 = 2.5762e-04
Loss = 1.0184e-02, PNorm = 180.1969, GNorm = 0.0999, lr_0 = 2.5745e-04
Loss = 8.8014e-03, PNorm = 180.2038, GNorm = 0.1178, lr_0 = 2.5727e-04
Loss = 8.9065e-03, PNorm = 180.2108, GNorm = 0.1566, lr_0 = 2.5709e-04
Loss = 7.2618e-03, PNorm = 180.2187, GNorm = 0.1455, lr_0 = 2.5692e-04
Loss = 7.5387e-03, PNorm = 180.2296, GNorm = 0.1750, lr_0 = 2.5674e-04
Loss = 7.3650e-03, PNorm = 180.2380, GNorm = 0.1751, lr_0 = 2.5656e-04
Loss = 6.7174e-03, PNorm = 180.2460, GNorm = 0.4197, lr_0 = 2.5639e-04
Loss = 1.2243e-02, PNorm = 180.2534, GNorm = 0.1464, lr_0 = 2.5621e-04
Loss = 7.6707e-03, PNorm = 180.2614, GNorm = 0.1852, lr_0 = 2.5604e-04
Loss = 7.1214e-03, PNorm = 180.2691, GNorm = 0.6081, lr_0 = 2.5586e-04
Loss = 5.4036e-03, PNorm = 180.2759, GNorm = 0.3482, lr_0 = 2.5569e-04
Loss = 8.4425e-03, PNorm = 180.2815, GNorm = 0.2897, lr_0 = 2.5551e-04
Loss = 1.1631e-02, PNorm = 180.2891, GNorm = 0.3123, lr_0 = 2.5534e-04
Loss = 8.8652e-03, PNorm = 180.2966, GNorm = 0.1570, lr_0 = 2.5516e-04
Loss = 8.9767e-03, PNorm = 180.3054, GNorm = 0.3026, lr_0 = 2.5499e-04
Loss = 8.5749e-03, PNorm = 180.3128, GNorm = 0.3355, lr_0 = 2.5481e-04
Loss = 8.5918e-03, PNorm = 180.3199, GNorm = 0.2669, lr_0 = 2.5464e-04
Loss = 6.8639e-03, PNorm = 180.3288, GNorm = 0.2256, lr_0 = 2.5446e-04
Loss = 7.1143e-03, PNorm = 180.3361, GNorm = 0.1593, lr_0 = 2.5429e-04
Loss = 1.0820e-02, PNorm = 180.3436, GNorm = 0.1611, lr_0 = 2.5411e-04
Loss = 1.0775e-02, PNorm = 180.3493, GNorm = 0.1893, lr_0 = 2.5394e-04
Loss = 6.1294e-03, PNorm = 180.3553, GNorm = 0.1351, lr_0 = 2.5377e-04
Loss = 8.3670e-03, PNorm = 180.3623, GNorm = 0.2037, lr_0 = 2.5359e-04
Loss = 1.5042e-02, PNorm = 180.3695, GNorm = 0.1120, lr_0 = 2.5342e-04
Loss = 1.7651e-02, PNorm = 180.3798, GNorm = 0.4153, lr_0 = 2.5325e-04
Loss = 7.4669e-03, PNorm = 180.3892, GNorm = 0.3221, lr_0 = 2.5307e-04
Loss = 1.0453e-02, PNorm = 180.3994, GNorm = 0.1931, lr_0 = 2.5290e-04
Loss = 9.3652e-03, PNorm = 180.4096, GNorm = 0.2920, lr_0 = 2.5273e-04
Loss = 6.1934e-03, PNorm = 180.4189, GNorm = 0.2475, lr_0 = 2.5255e-04
Loss = 6.3203e-03, PNorm = 180.4299, GNorm = 0.2693, lr_0 = 2.5238e-04
Loss = 5.0700e-03, PNorm = 180.4383, GNorm = 0.1462, lr_0 = 2.5221e-04
Loss = 1.5525e-02, PNorm = 180.4458, GNorm = 0.2211, lr_0 = 2.5203e-04
Loss = 7.1173e-03, PNorm = 180.4540, GNorm = 0.2875, lr_0 = 2.5186e-04
Loss = 6.0022e-03, PNorm = 180.4592, GNorm = 0.1014, lr_0 = 2.5169e-04
Loss = 7.7470e-03, PNorm = 180.4653, GNorm = 0.2023, lr_0 = 2.5152e-04
Loss = 6.7768e-03, PNorm = 180.4729, GNorm = 0.2003, lr_0 = 2.5134e-04
Loss = 1.1423e-02, PNorm = 180.4813, GNorm = 0.2443, lr_0 = 2.5117e-04
Loss = 6.8421e-03, PNorm = 180.4888, GNorm = 0.1613, lr_0 = 2.5100e-04
Loss = 6.4037e-03, PNorm = 180.4971, GNorm = 0.1396, lr_0 = 2.5083e-04
Loss = 5.2773e-03, PNorm = 180.5054, GNorm = 0.1348, lr_0 = 2.5066e-04
Loss = 8.5255e-03, PNorm = 180.5138, GNorm = 0.1944, lr_0 = 2.5048e-04
Loss = 8.0314e-03, PNorm = 180.5228, GNorm = 0.1426, lr_0 = 2.5031e-04
Loss = 1.0972e-02, PNorm = 180.5334, GNorm = 0.2431, lr_0 = 2.5014e-04
Loss = 6.9091e-03, PNorm = 180.5427, GNorm = 0.2304, lr_0 = 2.4997e-04
Loss = 1.1945e-02, PNorm = 180.5508, GNorm = 0.1359, lr_0 = 2.4980e-04
Loss = 6.4774e-03, PNorm = 180.5586, GNorm = 0.2960, lr_0 = 2.4963e-04
Loss = 8.0825e-03, PNorm = 180.5663, GNorm = 0.1018, lr_0 = 2.4946e-04
Loss = 7.4532e-03, PNorm = 180.5753, GNorm = 0.2890, lr_0 = 2.4929e-04
Loss = 5.8279e-03, PNorm = 180.5823, GNorm = 0.2471, lr_0 = 2.4911e-04
Loss = 7.7782e-03, PNorm = 180.5888, GNorm = 0.1910, lr_0 = 2.4894e-04
Loss = 7.3845e-03, PNorm = 180.5971, GNorm = 0.1172, lr_0 = 2.4877e-04
Loss = 8.6987e-03, PNorm = 180.6046, GNorm = 0.1917, lr_0 = 2.4860e-04
Loss = 8.7524e-03, PNorm = 180.6106, GNorm = 0.0763, lr_0 = 2.4843e-04
Loss = 7.1396e-03, PNorm = 180.6165, GNorm = 0.2271, lr_0 = 2.4826e-04
Loss = 7.6170e-03, PNorm = 180.6228, GNorm = 0.1410, lr_0 = 2.4809e-04
Loss = 1.0885e-02, PNorm = 180.6304, GNorm = 0.1346, lr_0 = 2.4792e-04
Loss = 8.5330e-03, PNorm = 180.6388, GNorm = 0.3883, lr_0 = 2.4775e-04
Loss = 8.4064e-03, PNorm = 180.6457, GNorm = 0.1884, lr_0 = 2.4758e-04
Loss = 5.6980e-03, PNorm = 180.6519, GNorm = 0.1280, lr_0 = 2.4741e-04
Loss = 6.9371e-03, PNorm = 180.6575, GNorm = 0.1539, lr_0 = 2.4724e-04
Loss = 1.5148e-02, PNorm = 180.6686, GNorm = 0.1145, lr_0 = 2.4707e-04
Validation mae = 0.121045
Epoch 19
Loss = 8.2204e-03, PNorm = 180.6775, GNorm = 0.4034, lr_0 = 2.4690e-04
Loss = 4.5396e-03, PNorm = 180.6856, GNorm = 0.1996, lr_0 = 2.4674e-04
Loss = 8.4049e-03, PNorm = 180.6916, GNorm = 0.1948, lr_0 = 2.4657e-04
Loss = 7.9002e-03, PNorm = 180.6957, GNorm = 0.1075, lr_0 = 2.4640e-04
Loss = 5.3321e-03, PNorm = 180.7001, GNorm = 0.1252, lr_0 = 2.4623e-04
Loss = 7.3851e-03, PNorm = 180.7064, GNorm = 0.1288, lr_0 = 2.4606e-04
Loss = 5.6158e-03, PNorm = 180.7118, GNorm = 0.2710, lr_0 = 2.4589e-04
Loss = 7.0327e-03, PNorm = 180.7186, GNorm = 0.1834, lr_0 = 2.4572e-04
Loss = 7.7069e-03, PNorm = 180.7254, GNorm = 0.4287, lr_0 = 2.4556e-04
Loss = 5.7372e-03, PNorm = 180.7319, GNorm = 0.1695, lr_0 = 2.4539e-04
Loss = 9.1525e-03, PNorm = 180.7380, GNorm = 0.4218, lr_0 = 2.4522e-04
Loss = 8.4057e-03, PNorm = 180.7454, GNorm = 0.1719, lr_0 = 2.4505e-04
Loss = 7.4810e-03, PNorm = 180.7505, GNorm = 0.4338, lr_0 = 2.4488e-04
Loss = 6.9865e-03, PNorm = 180.7559, GNorm = 0.2470, lr_0 = 2.4472e-04
Loss = 6.1662e-03, PNorm = 180.7606, GNorm = 0.2280, lr_0 = 2.4455e-04
Loss = 4.5712e-03, PNorm = 180.7649, GNorm = 0.1866, lr_0 = 2.4438e-04
Loss = 4.7938e-03, PNorm = 180.7702, GNorm = 0.2127, lr_0 = 2.4421e-04
Loss = 8.5314e-03, PNorm = 180.7757, GNorm = 0.1347, lr_0 = 2.4405e-04
Loss = 6.3706e-03, PNorm = 180.7818, GNorm = 0.1889, lr_0 = 2.4388e-04
Loss = 8.0105e-03, PNorm = 180.7883, GNorm = 0.2775, lr_0 = 2.4371e-04
Loss = 5.1644e-03, PNorm = 180.7931, GNorm = 0.2139, lr_0 = 2.4354e-04
Loss = 5.1808e-03, PNorm = 180.7983, GNorm = 0.1485, lr_0 = 2.4338e-04
Loss = 5.1809e-03, PNorm = 180.8033, GNorm = 0.1467, lr_0 = 2.4321e-04
Loss = 1.0856e-02, PNorm = 180.8068, GNorm = 0.4194, lr_0 = 2.4304e-04
Loss = 5.1524e-03, PNorm = 180.8135, GNorm = 0.1417, lr_0 = 2.4288e-04
Loss = 1.0798e-02, PNorm = 180.8223, GNorm = 0.4088, lr_0 = 2.4271e-04
Loss = 5.4107e-03, PNorm = 180.8295, GNorm = 0.1572, lr_0 = 2.4254e-04
Loss = 6.1550e-03, PNorm = 180.8358, GNorm = 0.1729, lr_0 = 2.4238e-04
Loss = 5.6296e-03, PNorm = 180.8419, GNorm = 0.1840, lr_0 = 2.4221e-04
Loss = 9.7635e-03, PNorm = 180.8481, GNorm = 0.2311, lr_0 = 2.4205e-04
Loss = 6.5344e-03, PNorm = 180.8522, GNorm = 0.2035, lr_0 = 2.4188e-04
Loss = 4.6319e-03, PNorm = 180.8570, GNorm = 0.1635, lr_0 = 2.4171e-04
Loss = 5.1022e-03, PNorm = 180.8609, GNorm = 0.2121, lr_0 = 2.4155e-04
Loss = 7.6894e-03, PNorm = 180.8678, GNorm = 0.1933, lr_0 = 2.4138e-04
Loss = 5.4185e-03, PNorm = 180.8750, GNorm = 0.1417, lr_0 = 2.4122e-04
Loss = 8.2756e-03, PNorm = 180.8815, GNorm = 0.2697, lr_0 = 2.4105e-04
Loss = 4.7521e-03, PNorm = 180.8891, GNorm = 0.1168, lr_0 = 2.4089e-04
Loss = 4.2523e-03, PNorm = 180.8951, GNorm = 0.2349, lr_0 = 2.4072e-04
Loss = 8.2398e-03, PNorm = 180.9002, GNorm = 0.1178, lr_0 = 2.4056e-04
Loss = 7.3991e-03, PNorm = 180.9078, GNorm = 0.1890, lr_0 = 2.4039e-04
Loss = 4.7708e-03, PNorm = 180.9137, GNorm = 0.2361, lr_0 = 2.4023e-04
Loss = 5.0935e-03, PNorm = 180.9196, GNorm = 0.2120, lr_0 = 2.4006e-04
Loss = 8.4342e-03, PNorm = 180.9259, GNorm = 0.3314, lr_0 = 2.3990e-04
Loss = 8.5780e-03, PNorm = 180.9289, GNorm = 0.1099, lr_0 = 2.3974e-04
Loss = 5.6047e-03, PNorm = 180.9341, GNorm = 0.0956, lr_0 = 2.3957e-04
Loss = 7.1171e-03, PNorm = 180.9396, GNorm = 0.1605, lr_0 = 2.3941e-04
Loss = 6.1500e-03, PNorm = 180.9450, GNorm = 0.1364, lr_0 = 2.3924e-04
Loss = 6.4280e-03, PNorm = 180.9531, GNorm = 0.0977, lr_0 = 2.3908e-04
Loss = 1.1040e-02, PNorm = 180.9584, GNorm = 0.2918, lr_0 = 2.3892e-04
Loss = 7.5463e-03, PNorm = 180.9623, GNorm = 0.1980, lr_0 = 2.3875e-04
Loss = 8.2991e-03, PNorm = 180.9652, GNorm = 0.2178, lr_0 = 2.3859e-04
Loss = 5.5969e-03, PNorm = 180.9710, GNorm = 0.3535, lr_0 = 2.3842e-04
Loss = 6.0469e-03, PNorm = 180.9797, GNorm = 0.2043, lr_0 = 2.3826e-04
Loss = 5.9122e-03, PNorm = 180.9864, GNorm = 0.1481, lr_0 = 2.3810e-04
Loss = 4.9973e-03, PNorm = 180.9949, GNorm = 0.0956, lr_0 = 2.3794e-04
Loss = 5.4851e-03, PNorm = 181.0021, GNorm = 0.1965, lr_0 = 2.3777e-04
Loss = 5.0093e-03, PNorm = 181.0066, GNorm = 0.1416, lr_0 = 2.3761e-04
Loss = 5.6142e-03, PNorm = 181.0116, GNorm = 0.1441, lr_0 = 2.3745e-04
Loss = 8.9430e-03, PNorm = 181.0172, GNorm = 0.2519, lr_0 = 2.3728e-04
Loss = 7.0730e-03, PNorm = 181.0233, GNorm = 0.1419, lr_0 = 2.3712e-04
Loss = 8.2630e-03, PNorm = 181.0281, GNorm = 0.1403, lr_0 = 2.3696e-04
Loss = 6.2462e-03, PNorm = 181.0366, GNorm = 0.2149, lr_0 = 2.3680e-04
Loss = 1.7615e-02, PNorm = 181.0444, GNorm = 0.3851, lr_0 = 2.3663e-04
Loss = 4.3744e-03, PNorm = 181.0491, GNorm = 0.2313, lr_0 = 2.3647e-04
Loss = 1.5594e-02, PNorm = 181.0482, GNorm = 0.6357, lr_0 = 2.3631e-04
Loss = 6.5594e-03, PNorm = 181.0586, GNorm = 0.1815, lr_0 = 2.3615e-04
Loss = 1.0823e-02, PNorm = 181.0670, GNorm = 0.6535, lr_0 = 2.3599e-04
Loss = 5.2956e-03, PNorm = 181.0751, GNorm = 0.1156, lr_0 = 2.3582e-04
Loss = 6.8704e-03, PNorm = 181.0835, GNorm = 0.0992, lr_0 = 2.3566e-04
Loss = 4.3077e-03, PNorm = 181.0901, GNorm = 0.1825, lr_0 = 2.3550e-04
Loss = 6.5131e-03, PNorm = 181.0956, GNorm = 0.1223, lr_0 = 2.3534e-04
Loss = 8.0363e-03, PNorm = 181.1030, GNorm = 0.1671, lr_0 = 2.3518e-04
Loss = 7.5745e-03, PNorm = 181.1092, GNorm = 0.1581, lr_0 = 2.3502e-04
Loss = 7.4792e-03, PNorm = 181.1151, GNorm = 0.2382, lr_0 = 2.3486e-04
Loss = 8.4225e-03, PNorm = 181.1231, GNorm = 0.4474, lr_0 = 2.3470e-04
Loss = 1.0591e-02, PNorm = 181.1296, GNorm = 0.1060, lr_0 = 2.3454e-04
Loss = 6.7184e-03, PNorm = 181.1388, GNorm = 0.1540, lr_0 = 2.3437e-04
Loss = 4.8586e-03, PNorm = 181.1473, GNorm = 0.1511, lr_0 = 2.3421e-04
Loss = 4.5109e-03, PNorm = 181.1544, GNorm = 0.3241, lr_0 = 2.3405e-04
Loss = 8.6750e-03, PNorm = 181.1609, GNorm = 0.2668, lr_0 = 2.3389e-04
Loss = 5.7154e-03, PNorm = 181.1686, GNorm = 0.2365, lr_0 = 2.3373e-04
Loss = 6.8475e-03, PNorm = 181.1745, GNorm = 0.2439, lr_0 = 2.3357e-04
Loss = 5.9056e-03, PNorm = 181.1803, GNorm = 0.1712, lr_0 = 2.3341e-04
Loss = 9.9174e-03, PNorm = 181.1860, GNorm = 0.1490, lr_0 = 2.3325e-04
Loss = 8.8253e-03, PNorm = 181.1933, GNorm = 0.2363, lr_0 = 2.3309e-04
Loss = 6.4892e-03, PNorm = 181.2023, GNorm = 0.1865, lr_0 = 2.3293e-04
Loss = 5.0167e-03, PNorm = 181.2095, GNorm = 0.2081, lr_0 = 2.3277e-04
Loss = 4.8670e-03, PNorm = 181.2160, GNorm = 0.2097, lr_0 = 2.3261e-04
Loss = 1.0360e-02, PNorm = 181.2216, GNorm = 0.1867, lr_0 = 2.3246e-04
Loss = 7.6197e-03, PNorm = 181.2268, GNorm = 0.1316, lr_0 = 2.3230e-04
Loss = 6.1495e-03, PNorm = 181.2338, GNorm = 0.0973, lr_0 = 2.3214e-04
Loss = 5.5424e-03, PNorm = 181.2398, GNorm = 0.0849, lr_0 = 2.3198e-04
Loss = 5.8306e-03, PNorm = 181.2457, GNorm = 0.1904, lr_0 = 2.3182e-04
Loss = 8.3858e-03, PNorm = 181.2531, GNorm = 0.6110, lr_0 = 2.3166e-04
Loss = 1.0220e-02, PNorm = 181.2592, GNorm = 0.0892, lr_0 = 2.3150e-04
Loss = 4.5338e-03, PNorm = 181.2660, GNorm = 0.2982, lr_0 = 2.3134e-04
Loss = 6.8085e-03, PNorm = 181.2732, GNorm = 0.1312, lr_0 = 2.3118e-04
Loss = 5.0209e-03, PNorm = 181.2817, GNorm = 0.0781, lr_0 = 2.3103e-04
Loss = 8.8633e-03, PNorm = 181.2898, GNorm = 0.2406, lr_0 = 2.3087e-04
Loss = 1.0456e-02, PNorm = 181.2914, GNorm = 0.4382, lr_0 = 2.3071e-04
Loss = 6.2865e-03, PNorm = 181.2993, GNorm = 0.1006, lr_0 = 2.3055e-04
Loss = 7.4543e-03, PNorm = 181.3065, GNorm = 0.1547, lr_0 = 2.3039e-04
Loss = 1.1253e-02, PNorm = 181.3132, GNorm = 0.1636, lr_0 = 2.3024e-04
Loss = 1.0151e-02, PNorm = 181.3225, GNorm = 0.3037, lr_0 = 2.3008e-04
Loss = 8.0838e-03, PNorm = 181.3297, GNorm = 0.2272, lr_0 = 2.2992e-04
Loss = 7.3342e-03, PNorm = 181.3322, GNorm = 0.2672, lr_0 = 2.2976e-04
Loss = 5.0620e-03, PNorm = 181.3405, GNorm = 0.1024, lr_0 = 2.2961e-04
Loss = 6.4382e-03, PNorm = 181.3487, GNorm = 0.1713, lr_0 = 2.2945e-04
Loss = 6.5196e-03, PNorm = 181.3567, GNorm = 0.1153, lr_0 = 2.2929e-04
Loss = 5.2188e-03, PNorm = 181.3649, GNorm = 0.2448, lr_0 = 2.2913e-04
Loss = 1.2427e-02, PNorm = 181.3698, GNorm = 0.3523, lr_0 = 2.2898e-04
Loss = 1.6477e-02, PNorm = 181.3761, GNorm = 0.2095, lr_0 = 2.2882e-04
Loss = 8.1384e-03, PNorm = 181.3802, GNorm = 0.1143, lr_0 = 2.2866e-04
Loss = 9.2876e-03, PNorm = 181.3869, GNorm = 0.1372, lr_0 = 2.2851e-04
Loss = 6.5705e-03, PNorm = 181.3952, GNorm = 0.1219, lr_0 = 2.2835e-04
Loss = 4.9001e-03, PNorm = 181.4015, GNorm = 0.2425, lr_0 = 2.2819e-04
Loss = 5.2866e-03, PNorm = 181.4090, GNorm = 0.1360, lr_0 = 2.2804e-04
Loss = 5.9240e-03, PNorm = 181.4160, GNorm = 0.2089, lr_0 = 2.2788e-04
Loss = 1.6544e-02, PNorm = 181.4221, GNorm = 0.1880, lr_0 = 2.2773e-04
Loss = 5.2624e-03, PNorm = 181.4277, GNorm = 0.1466, lr_0 = 2.2757e-04
Validation mae = 0.120870
Epoch 20
Loss = 1.1184e-02, PNorm = 181.4337, GNorm = 0.1879, lr_0 = 2.2741e-04
Loss = 5.5841e-03, PNorm = 181.4387, GNorm = 0.2667, lr_0 = 2.2726e-04
Loss = 5.3376e-03, PNorm = 181.4448, GNorm = 0.1095, lr_0 = 2.2710e-04
Loss = 6.9398e-03, PNorm = 181.4498, GNorm = 0.1229, lr_0 = 2.2695e-04
Loss = 8.4408e-03, PNorm = 181.4530, GNorm = 0.2694, lr_0 = 2.2679e-04
Loss = 4.8681e-03, PNorm = 181.4572, GNorm = 0.1142, lr_0 = 2.2664e-04
Loss = 4.6118e-03, PNorm = 181.4620, GNorm = 0.1006, lr_0 = 2.2648e-04
Loss = 1.0277e-02, PNorm = 181.4680, GNorm = 0.1731, lr_0 = 2.2632e-04
Loss = 4.9589e-03, PNorm = 181.4731, GNorm = 0.0949, lr_0 = 2.2617e-04
Loss = 6.0336e-03, PNorm = 181.4781, GNorm = 0.5684, lr_0 = 2.2601e-04
Loss = 6.6034e-03, PNorm = 181.4836, GNorm = 0.1394, lr_0 = 2.2586e-04
Loss = 5.9181e-03, PNorm = 181.4891, GNorm = 0.2590, lr_0 = 2.2571e-04
Loss = 5.7545e-03, PNorm = 181.4973, GNorm = 0.0788, lr_0 = 2.2555e-04
Loss = 6.3712e-03, PNorm = 181.5038, GNorm = 0.2485, lr_0 = 2.2540e-04
Loss = 9.5809e-03, PNorm = 181.5089, GNorm = 0.1828, lr_0 = 2.2524e-04
Loss = 4.4400e-03, PNorm = 181.5146, GNorm = 0.2144, lr_0 = 2.2509e-04
Loss = 5.3456e-03, PNorm = 181.5176, GNorm = 0.0912, lr_0 = 2.2493e-04
Loss = 7.8345e-03, PNorm = 181.5224, GNorm = 0.1599, lr_0 = 2.2478e-04
Loss = 4.6445e-03, PNorm = 181.5270, GNorm = 0.1370, lr_0 = 2.2463e-04
Loss = 5.7326e-03, PNorm = 181.5302, GNorm = 0.1675, lr_0 = 2.2447e-04
Loss = 5.6819e-03, PNorm = 181.5349, GNorm = 0.1343, lr_0 = 2.2432e-04
Loss = 4.4769e-03, PNorm = 181.5413, GNorm = 0.1178, lr_0 = 2.2416e-04
Loss = 3.9163e-03, PNorm = 181.5462, GNorm = 0.1126, lr_0 = 2.2401e-04
Loss = 4.1442e-03, PNorm = 181.5514, GNorm = 0.1482, lr_0 = 2.2386e-04
Loss = 7.1975e-03, PNorm = 181.5552, GNorm = 0.1115, lr_0 = 2.2370e-04
Loss = 7.5007e-03, PNorm = 181.5584, GNorm = 0.1544, lr_0 = 2.2355e-04
Loss = 4.2958e-03, PNorm = 181.5626, GNorm = 0.1677, lr_0 = 2.2340e-04
Loss = 8.5362e-03, PNorm = 181.5668, GNorm = 0.1383, lr_0 = 2.2324e-04
Loss = 7.7647e-03, PNorm = 181.5731, GNorm = 0.1656, lr_0 = 2.2309e-04
Loss = 5.0375e-03, PNorm = 181.5809, GNorm = 0.2113, lr_0 = 2.2294e-04
Loss = 6.6802e-03, PNorm = 181.5854, GNorm = 0.2020, lr_0 = 2.2279e-04
Loss = 4.4440e-03, PNorm = 181.5887, GNorm = 0.2196, lr_0 = 2.2263e-04
Loss = 5.9720e-03, PNorm = 181.5918, GNorm = 0.2374, lr_0 = 2.2248e-04
Loss = 5.0085e-03, PNorm = 181.5950, GNorm = 0.1757, lr_0 = 2.2233e-04
Loss = 1.2792e-02, PNorm = 181.6014, GNorm = 0.1772, lr_0 = 2.2218e-04
Loss = 4.5995e-03, PNorm = 181.6077, GNorm = 0.1012, lr_0 = 2.2202e-04
Loss = 7.1560e-03, PNorm = 181.6121, GNorm = 0.1437, lr_0 = 2.2187e-04
Loss = 8.5066e-03, PNorm = 181.6186, GNorm = 0.1208, lr_0 = 2.2172e-04
Loss = 9.8591e-03, PNorm = 181.6249, GNorm = 0.1288, lr_0 = 2.2157e-04
Loss = 5.0557e-03, PNorm = 181.6281, GNorm = 0.1165, lr_0 = 2.2142e-04
Loss = 8.2089e-03, PNorm = 181.6328, GNorm = 0.1009, lr_0 = 2.2126e-04
Loss = 4.4549e-03, PNorm = 181.6382, GNorm = 0.1979, lr_0 = 2.2111e-04
Loss = 4.1239e-03, PNorm = 181.6445, GNorm = 0.2525, lr_0 = 2.2096e-04
Loss = 7.3787e-03, PNorm = 181.6483, GNorm = 0.1764, lr_0 = 2.2081e-04
Loss = 6.5239e-03, PNorm = 181.6542, GNorm = 0.1023, lr_0 = 2.2066e-04
Loss = 1.1696e-02, PNorm = 181.6596, GNorm = 0.0934, lr_0 = 2.2051e-04
Loss = 4.8221e-03, PNorm = 181.6661, GNorm = 0.1263, lr_0 = 2.2036e-04
Loss = 5.9159e-03, PNorm = 181.6730, GNorm = 0.2156, lr_0 = 2.2021e-04
Loss = 6.4905e-03, PNorm = 181.6790, GNorm = 0.3470, lr_0 = 2.2005e-04
Loss = 5.3220e-03, PNorm = 181.6862, GNorm = 0.1569, lr_0 = 2.1990e-04
Loss = 6.2882e-03, PNorm = 181.6910, GNorm = 0.1600, lr_0 = 2.1975e-04
Loss = 4.8297e-03, PNorm = 181.6954, GNorm = 0.2724, lr_0 = 2.1960e-04
Loss = 4.7029e-03, PNorm = 181.7019, GNorm = 0.1247, lr_0 = 2.1945e-04
Loss = 4.2013e-03, PNorm = 181.7078, GNorm = 0.1973, lr_0 = 2.1930e-04
Loss = 9.1889e-03, PNorm = 181.7146, GNorm = 0.2580, lr_0 = 2.1915e-04
Loss = 5.3193e-03, PNorm = 181.7203, GNorm = 0.2076, lr_0 = 2.1900e-04
Loss = 4.6681e-03, PNorm = 181.7273, GNorm = 0.4074, lr_0 = 2.1885e-04
Loss = 4.0272e-03, PNorm = 181.7324, GNorm = 0.0790, lr_0 = 2.1870e-04
Loss = 5.2303e-03, PNorm = 181.7373, GNorm = 0.2136, lr_0 = 2.1855e-04
Loss = 5.6495e-03, PNorm = 181.7410, GNorm = 0.2378, lr_0 = 2.1840e-04
Loss = 5.8177e-03, PNorm = 181.7452, GNorm = 0.1583, lr_0 = 2.1825e-04
Loss = 4.7032e-03, PNorm = 181.7506, GNorm = 0.2001, lr_0 = 2.1810e-04
Loss = 4.6960e-03, PNorm = 181.7563, GNorm = 0.1398, lr_0 = 2.1795e-04
Loss = 1.8503e-02, PNorm = 181.7608, GNorm = 0.1939, lr_0 = 2.1780e-04
Loss = 5.7491e-03, PNorm = 181.7638, GNorm = 0.1612, lr_0 = 2.1765e-04
Loss = 4.6009e-03, PNorm = 181.7696, GNorm = 0.1262, lr_0 = 2.1751e-04
Loss = 8.3948e-03, PNorm = 181.7745, GNorm = 0.4613, lr_0 = 2.1736e-04
Loss = 6.9389e-03, PNorm = 181.7796, GNorm = 0.2692, lr_0 = 2.1721e-04
Loss = 5.5342e-03, PNorm = 181.7840, GNorm = 0.0788, lr_0 = 2.1706e-04
Loss = 3.8549e-03, PNorm = 181.7887, GNorm = 0.1029, lr_0 = 2.1691e-04
Loss = 5.3259e-03, PNorm = 181.7922, GNorm = 0.1324, lr_0 = 2.1676e-04
Loss = 4.2315e-03, PNorm = 181.7975, GNorm = 0.1252, lr_0 = 2.1661e-04
Loss = 4.5910e-03, PNorm = 181.8034, GNorm = 0.1473, lr_0 = 2.1646e-04
Loss = 5.4335e-03, PNorm = 181.8098, GNorm = 0.2434, lr_0 = 2.1632e-04
Loss = 5.2122e-03, PNorm = 181.8144, GNorm = 0.1334, lr_0 = 2.1617e-04
Loss = 4.2219e-03, PNorm = 181.8173, GNorm = 0.1940, lr_0 = 2.1602e-04
Loss = 7.8233e-03, PNorm = 181.8219, GNorm = 0.1343, lr_0 = 2.1587e-04
Loss = 1.0494e-02, PNorm = 181.8244, GNorm = 0.1039, lr_0 = 2.1572e-04
Loss = 4.2809e-03, PNorm = 181.8309, GNorm = 0.0990, lr_0 = 2.1558e-04
Loss = 5.7190e-03, PNorm = 181.8373, GNorm = 0.1406, lr_0 = 2.1543e-04
Loss = 4.3273e-03, PNorm = 181.8414, GNorm = 0.1583, lr_0 = 2.1528e-04
Loss = 8.2172e-03, PNorm = 181.8485, GNorm = 0.1518, lr_0 = 2.1513e-04
Loss = 4.3701e-03, PNorm = 181.8567, GNorm = 0.2831, lr_0 = 2.1499e-04
Loss = 7.1170e-03, PNorm = 181.8603, GNorm = 0.1445, lr_0 = 2.1484e-04
Loss = 5.5414e-03, PNorm = 181.8654, GNorm = 0.1389, lr_0 = 2.1469e-04
Loss = 4.7936e-03, PNorm = 181.8686, GNorm = 0.1253, lr_0 = 2.1454e-04
Loss = 3.7260e-03, PNorm = 181.8731, GNorm = 0.0909, lr_0 = 2.1440e-04
Loss = 1.0211e-02, PNorm = 181.8774, GNorm = 0.2287, lr_0 = 2.1425e-04
Loss = 1.2806e-02, PNorm = 181.8847, GNorm = 0.1330, lr_0 = 2.1410e-04
Loss = 6.5997e-03, PNorm = 181.8925, GNorm = 0.1865, lr_0 = 2.1396e-04
Loss = 6.7054e-03, PNorm = 181.8982, GNorm = 0.1910, lr_0 = 2.1381e-04
Loss = 3.7697e-03, PNorm = 181.9046, GNorm = 0.2014, lr_0 = 2.1366e-04
Loss = 4.5977e-03, PNorm = 181.9096, GNorm = 0.2489, lr_0 = 2.1352e-04
Loss = 7.8554e-03, PNorm = 181.9139, GNorm = 0.1418, lr_0 = 2.1337e-04
Loss = 1.0024e-02, PNorm = 181.9203, GNorm = 0.1623, lr_0 = 2.1323e-04
Loss = 6.3468e-03, PNorm = 181.9286, GNorm = 0.2596, lr_0 = 2.1308e-04
Loss = 9.0183e-03, PNorm = 181.9356, GNorm = 0.1555, lr_0 = 2.1293e-04
Loss = 4.4848e-03, PNorm = 181.9423, GNorm = 0.0998, lr_0 = 2.1279e-04
Loss = 7.7914e-03, PNorm = 181.9491, GNorm = 0.4701, lr_0 = 2.1264e-04
Loss = 7.0798e-03, PNorm = 181.9586, GNorm = 0.1691, lr_0 = 2.1250e-04
Loss = 8.0867e-03, PNorm = 181.9658, GNorm = 0.2255, lr_0 = 2.1235e-04
Loss = 4.3983e-03, PNorm = 181.9728, GNorm = 0.1554, lr_0 = 2.1221e-04
Loss = 8.0663e-03, PNorm = 181.9781, GNorm = 0.1125, lr_0 = 2.1206e-04
Loss = 4.4579e-03, PNorm = 181.9835, GNorm = 0.2000, lr_0 = 2.1191e-04
Loss = 5.3781e-03, PNorm = 181.9895, GNorm = 0.1379, lr_0 = 2.1177e-04
Loss = 6.4657e-03, PNorm = 181.9946, GNorm = 0.1530, lr_0 = 2.1162e-04
Loss = 4.9565e-03, PNorm = 181.9990, GNorm = 0.2209, lr_0 = 2.1148e-04
Loss = 7.7057e-03, PNorm = 182.0041, GNorm = 0.1875, lr_0 = 2.1133e-04
Loss = 6.4618e-03, PNorm = 182.0079, GNorm = 0.1032, lr_0 = 2.1119e-04
Loss = 6.6212e-03, PNorm = 182.0137, GNorm = 0.1535, lr_0 = 2.1104e-04
Loss = 5.1223e-03, PNorm = 182.0188, GNorm = 0.2309, lr_0 = 2.1090e-04
Loss = 1.0393e-02, PNorm = 182.0245, GNorm = 0.3066, lr_0 = 2.1076e-04
Loss = 3.6630e-03, PNorm = 182.0308, GNorm = 0.1524, lr_0 = 2.1061e-04
Loss = 6.9674e-03, PNorm = 182.0385, GNorm = 0.0840, lr_0 = 2.1047e-04
Loss = 5.2404e-03, PNorm = 182.0460, GNorm = 0.2163, lr_0 = 2.1032e-04
Loss = 8.9444e-03, PNorm = 182.0521, GNorm = 0.1731, lr_0 = 2.1018e-04
Loss = 6.2792e-03, PNorm = 182.0567, GNorm = 0.0985, lr_0 = 2.1003e-04
Loss = 4.8009e-03, PNorm = 182.0616, GNorm = 0.2944, lr_0 = 2.0989e-04
Loss = 4.3635e-03, PNorm = 182.0664, GNorm = 0.1310, lr_0 = 2.0975e-04
Loss = 4.7539e-03, PNorm = 182.0691, GNorm = 0.0866, lr_0 = 2.0960e-04
Validation mae = 0.120776
Epoch 21
Loss = 6.7097e-03, PNorm = 182.0724, GNorm = 0.1802, lr_0 = 2.0946e-04
Loss = 3.6930e-03, PNorm = 182.0789, GNorm = 0.0811, lr_0 = 2.0932e-04
Loss = 7.9908e-03, PNorm = 182.0818, GNorm = 0.0858, lr_0 = 2.0917e-04
Loss = 3.6981e-03, PNorm = 182.0836, GNorm = 0.1415, lr_0 = 2.0903e-04
Loss = 8.6904e-03, PNorm = 182.0865, GNorm = 0.0724, lr_0 = 2.0889e-04
Loss = 7.1670e-03, PNorm = 182.0888, GNorm = 0.2616, lr_0 = 2.0874e-04
Loss = 4.4068e-03, PNorm = 182.0918, GNorm = 0.4350, lr_0 = 2.0860e-04
Loss = 6.1157e-03, PNorm = 182.0949, GNorm = 0.1505, lr_0 = 2.0846e-04
Loss = 4.0874e-03, PNorm = 182.1020, GNorm = 0.5531, lr_0 = 2.0831e-04
Loss = 3.7182e-03, PNorm = 182.1042, GNorm = 0.0999, lr_0 = 2.0817e-04
Loss = 4.8663e-03, PNorm = 182.1080, GNorm = 0.2505, lr_0 = 2.0803e-04
Loss = 7.3285e-03, PNorm = 182.1141, GNorm = 0.2923, lr_0 = 2.0789e-04
Loss = 4.2633e-03, PNorm = 182.1187, GNorm = 0.2754, lr_0 = 2.0774e-04
Loss = 5.0981e-03, PNorm = 182.1230, GNorm = 0.1231, lr_0 = 2.0760e-04
Loss = 7.3231e-03, PNorm = 182.1269, GNorm = 0.1264, lr_0 = 2.0746e-04
Loss = 6.3416e-03, PNorm = 182.1305, GNorm = 0.1725, lr_0 = 2.0732e-04
Loss = 4.2475e-03, PNorm = 182.1346, GNorm = 0.1008, lr_0 = 2.0718e-04
Loss = 4.9349e-03, PNorm = 182.1387, GNorm = 0.2512, lr_0 = 2.0703e-04
Loss = 3.6431e-03, PNorm = 182.1445, GNorm = 0.0694, lr_0 = 2.0689e-04
Loss = 5.1962e-03, PNorm = 182.1489, GNorm = 0.1515, lr_0 = 2.0675e-04
Loss = 4.8915e-03, PNorm = 182.1511, GNorm = 1.0712, lr_0 = 2.0661e-04
Loss = 3.6676e-03, PNorm = 182.1535, GNorm = 0.1040, lr_0 = 2.0647e-04
Loss = 4.0642e-03, PNorm = 182.1571, GNorm = 0.2149, lr_0 = 2.0633e-04
Loss = 3.8110e-03, PNorm = 182.1598, GNorm = 0.1852, lr_0 = 2.0618e-04
Loss = 5.6456e-03, PNorm = 182.1625, GNorm = 0.0830, lr_0 = 2.0604e-04
Loss = 5.2650e-03, PNorm = 182.1648, GNorm = 0.4093, lr_0 = 2.0590e-04
Loss = 5.7503e-03, PNorm = 182.1707, GNorm = 0.1558, lr_0 = 2.0576e-04
Loss = 4.7121e-03, PNorm = 182.1784, GNorm = 0.0658, lr_0 = 2.0562e-04
Loss = 4.9977e-03, PNorm = 182.1855, GNorm = 0.1449, lr_0 = 2.0548e-04
Loss = 5.6804e-03, PNorm = 182.1929, GNorm = 0.2028, lr_0 = 2.0534e-04
Loss = 3.4496e-03, PNorm = 182.1981, GNorm = 0.1484, lr_0 = 2.0520e-04
Loss = 3.9227e-03, PNorm = 182.2027, GNorm = 0.0880, lr_0 = 2.0506e-04
Loss = 3.7389e-03, PNorm = 182.2088, GNorm = 0.1222, lr_0 = 2.0492e-04
Loss = 4.5426e-03, PNorm = 182.2143, GNorm = 0.1558, lr_0 = 2.0478e-04
Loss = 7.2945e-03, PNorm = 182.2197, GNorm = 0.6353, lr_0 = 2.0464e-04
Loss = 4.0336e-03, PNorm = 182.2271, GNorm = 0.0768, lr_0 = 2.0450e-04
Loss = 6.8091e-03, PNorm = 182.2339, GNorm = 0.1156, lr_0 = 2.0436e-04
Loss = 5.4317e-03, PNorm = 182.2366, GNorm = 0.0738, lr_0 = 2.0422e-04
Loss = 1.0319e-02, PNorm = 182.2406, GNorm = 0.1550, lr_0 = 2.0408e-04
Loss = 5.2287e-03, PNorm = 182.2467, GNorm = 0.2023, lr_0 = 2.0394e-04
Loss = 3.2816e-03, PNorm = 182.2519, GNorm = 0.1005, lr_0 = 2.0380e-04
Loss = 8.3051e-03, PNorm = 182.2570, GNorm = 0.0955, lr_0 = 2.0366e-04
Loss = 5.2186e-03, PNorm = 182.2628, GNorm = 0.6393, lr_0 = 2.0352e-04
Loss = 7.6338e-03, PNorm = 182.2667, GNorm = 0.1469, lr_0 = 2.0338e-04
Loss = 2.9911e-03, PNorm = 182.2714, GNorm = 0.0740, lr_0 = 2.0324e-04
Loss = 4.3234e-03, PNorm = 182.2747, GNorm = 0.1261, lr_0 = 2.0310e-04
Loss = 6.6630e-03, PNorm = 182.2767, GNorm = 0.0697, lr_0 = 2.0296e-04
Loss = 2.7197e-03, PNorm = 182.2793, GNorm = 0.4198, lr_0 = 2.0282e-04
Loss = 4.0970e-03, PNorm = 182.2835, GNorm = 0.1746, lr_0 = 2.0268e-04
Loss = 6.8152e-03, PNorm = 182.2870, GNorm = 0.0959, lr_0 = 2.0254e-04
Loss = 9.2544e-03, PNorm = 182.2922, GNorm = 0.3770, lr_0 = 2.0240e-04
Loss = 5.1165e-03, PNorm = 182.2961, GNorm = 0.0768, lr_0 = 2.0227e-04
Loss = 6.3768e-03, PNorm = 182.2994, GNorm = 0.1024, lr_0 = 2.0213e-04
Loss = 3.9605e-03, PNorm = 182.3049, GNorm = 0.1021, lr_0 = 2.0199e-04
Loss = 3.9096e-03, PNorm = 182.3132, GNorm = 0.0874, lr_0 = 2.0185e-04
Loss = 5.4296e-03, PNorm = 182.3172, GNorm = 0.4419, lr_0 = 2.0171e-04
Loss = 4.4817e-03, PNorm = 182.3205, GNorm = 0.2040, lr_0 = 2.0157e-04
Loss = 3.4527e-03, PNorm = 182.3264, GNorm = 0.1215, lr_0 = 2.0144e-04
Loss = 4.2968e-03, PNorm = 182.3312, GNorm = 0.1828, lr_0 = 2.0130e-04
Loss = 3.5277e-03, PNorm = 182.3343, GNorm = 0.1667, lr_0 = 2.0116e-04
Loss = 3.1924e-03, PNorm = 182.3374, GNorm = 0.2476, lr_0 = 2.0102e-04
Loss = 3.6122e-03, PNorm = 182.3416, GNorm = 0.3024, lr_0 = 2.0088e-04
Loss = 3.6965e-03, PNorm = 182.3454, GNorm = 0.1018, lr_0 = 2.0075e-04
Loss = 3.8121e-03, PNorm = 182.3500, GNorm = 0.0999, lr_0 = 2.0061e-04
Loss = 8.0290e-03, PNorm = 182.3537, GNorm = 0.1911, lr_0 = 2.0047e-04
Loss = 3.6909e-03, PNorm = 182.3575, GNorm = 0.2834, lr_0 = 2.0033e-04
Loss = 3.5389e-03, PNorm = 182.3604, GNorm = 0.2425, lr_0 = 2.0020e-04
Loss = 5.5540e-03, PNorm = 182.3651, GNorm = 0.1549, lr_0 = 2.0006e-04
Loss = 3.3704e-03, PNorm = 182.3700, GNorm = 0.1509, lr_0 = 1.9992e-04
Loss = 4.9345e-03, PNorm = 182.3747, GNorm = 0.1081, lr_0 = 1.9979e-04
Loss = 5.7972e-03, PNorm = 182.3786, GNorm = 0.1059, lr_0 = 1.9965e-04
Loss = 3.7047e-03, PNorm = 182.3822, GNorm = 0.1296, lr_0 = 1.9951e-04
Loss = 5.5603e-03, PNorm = 182.3848, GNorm = 0.2050, lr_0 = 1.9938e-04
Loss = 8.8083e-03, PNorm = 182.3897, GNorm = 0.1380, lr_0 = 1.9924e-04
Loss = 4.6567e-03, PNorm = 182.3954, GNorm = 0.1454, lr_0 = 1.9910e-04
Loss = 3.3055e-03, PNorm = 182.4007, GNorm = 0.1054, lr_0 = 1.9897e-04
Loss = 6.9189e-03, PNorm = 182.4051, GNorm = 0.3682, lr_0 = 1.9883e-04
Loss = 3.2212e-03, PNorm = 182.4087, GNorm = 0.0987, lr_0 = 1.9869e-04
Loss = 1.0124e-02, PNorm = 182.4124, GNorm = 0.3934, lr_0 = 1.9856e-04
Loss = 7.5345e-03, PNorm = 182.4181, GNorm = 0.2003, lr_0 = 1.9842e-04
Loss = 7.6315e-03, PNorm = 182.4246, GNorm = 0.0992, lr_0 = 1.9829e-04
Loss = 7.0813e-03, PNorm = 182.4294, GNorm = 0.1112, lr_0 = 1.9815e-04
Loss = 9.2114e-03, PNorm = 182.4340, GNorm = 0.0881, lr_0 = 1.9801e-04
Loss = 5.8972e-03, PNorm = 182.4382, GNorm = 0.1402, lr_0 = 1.9788e-04
Loss = 6.5701e-03, PNorm = 182.4423, GNorm = 0.1845, lr_0 = 1.9774e-04
Loss = 1.0518e-02, PNorm = 182.4464, GNorm = 0.0872, lr_0 = 1.9761e-04
Loss = 4.9119e-03, PNorm = 182.4514, GNorm = 0.3648, lr_0 = 1.9747e-04
Loss = 5.4274e-03, PNorm = 182.4560, GNorm = 0.1938, lr_0 = 1.9734e-04
Loss = 6.4108e-03, PNorm = 182.4612, GNorm = 0.2143, lr_0 = 1.9720e-04
Loss = 7.4999e-03, PNorm = 182.4644, GNorm = 0.1547, lr_0 = 1.9707e-04
Loss = 4.3150e-03, PNorm = 182.4684, GNorm = 0.2371, lr_0 = 1.9693e-04
Loss = 8.0559e-03, PNorm = 182.4726, GNorm = 0.0831, lr_0 = 1.9680e-04
Loss = 4.4614e-03, PNorm = 182.4760, GNorm = 0.1398, lr_0 = 1.9666e-04
Loss = 5.5802e-03, PNorm = 182.4803, GNorm = 0.1284, lr_0 = 1.9653e-04
Loss = 4.9006e-03, PNorm = 182.4850, GNorm = 0.1125, lr_0 = 1.9639e-04
Loss = 6.7306e-03, PNorm = 182.4884, GNorm = 0.0985, lr_0 = 1.9626e-04
Loss = 2.0067e-02, PNorm = 182.4951, GNorm = 0.2460, lr_0 = 1.9612e-04
Loss = 4.6445e-03, PNorm = 182.4996, GNorm = 0.2504, lr_0 = 1.9599e-04
Loss = 3.7132e-03, PNorm = 182.5039, GNorm = 0.0746, lr_0 = 1.9585e-04
Loss = 4.5227e-03, PNorm = 182.5106, GNorm = 0.1417, lr_0 = 1.9572e-04
Loss = 5.4423e-03, PNorm = 182.5173, GNorm = 0.4366, lr_0 = 1.9559e-04
Loss = 3.8357e-03, PNorm = 182.5225, GNorm = 0.1480, lr_0 = 1.9545e-04
Loss = 3.8598e-03, PNorm = 182.5260, GNorm = 0.1456, lr_0 = 1.9532e-04
Loss = 3.0806e-03, PNorm = 182.5305, GNorm = 0.1075, lr_0 = 1.9518e-04
Loss = 4.8899e-03, PNorm = 182.5346, GNorm = 0.1864, lr_0 = 1.9505e-04
Loss = 4.6461e-03, PNorm = 182.5400, GNorm = 0.1324, lr_0 = 1.9492e-04
Loss = 1.1313e-02, PNorm = 182.5451, GNorm = 0.2168, lr_0 = 1.9478e-04
Loss = 6.6401e-03, PNorm = 182.5512, GNorm = 0.1155, lr_0 = 1.9465e-04
Loss = 6.6945e-03, PNorm = 182.5568, GNorm = 0.1546, lr_0 = 1.9452e-04
Loss = 3.0788e-03, PNorm = 182.5610, GNorm = 0.1784, lr_0 = 1.9438e-04
Loss = 4.1362e-03, PNorm = 182.5649, GNorm = 0.1465, lr_0 = 1.9425e-04
Loss = 1.4762e-02, PNorm = 182.5683, GNorm = 0.2603, lr_0 = 1.9412e-04
Loss = 8.5424e-03, PNorm = 182.5740, GNorm = 0.3100, lr_0 = 1.9398e-04
Loss = 9.7895e-03, PNorm = 182.5781, GNorm = 0.1663, lr_0 = 1.9385e-04
Loss = 5.5944e-03, PNorm = 182.5825, GNorm = 0.1830, lr_0 = 1.9372e-04
Loss = 9.6791e-03, PNorm = 182.5866, GNorm = 0.1520, lr_0 = 1.9359e-04
Loss = 4.9847e-03, PNorm = 182.5933, GNorm = 0.1767, lr_0 = 1.9345e-04
Loss = 5.5004e-03, PNorm = 182.5999, GNorm = 0.1099, lr_0 = 1.9332e-04
Loss = 3.4633e-03, PNorm = 182.6065, GNorm = 0.0868, lr_0 = 1.9319e-04
Loss = 5.5977e-03, PNorm = 182.6115, GNorm = 0.1204, lr_0 = 1.9306e-04
Validation mae = 0.120664
Epoch 22
Loss = 4.1914e-03, PNorm = 182.6182, GNorm = 0.1341, lr_0 = 1.9292e-04
Loss = 4.5009e-03, PNorm = 182.6218, GNorm = 0.1152, lr_0 = 1.9279e-04
Loss = 4.0218e-03, PNorm = 182.6266, GNorm = 0.2274, lr_0 = 1.9266e-04
Loss = 6.0581e-03, PNorm = 182.6287, GNorm = 0.1580, lr_0 = 1.9253e-04
Loss = 7.8003e-03, PNorm = 182.6308, GNorm = 0.1130, lr_0 = 1.9240e-04
Loss = 5.1860e-03, PNorm = 182.6335, GNorm = 0.0731, lr_0 = 1.9226e-04
Loss = 2.9036e-03, PNorm = 182.6369, GNorm = 0.1136, lr_0 = 1.9213e-04
Loss = 3.3887e-03, PNorm = 182.6397, GNorm = 0.2887, lr_0 = 1.9200e-04
Loss = 8.6188e-03, PNorm = 182.6421, GNorm = 0.1795, lr_0 = 1.9187e-04
Loss = 1.0436e-02, PNorm = 182.6466, GNorm = 0.3382, lr_0 = 1.9174e-04
Loss = 4.6700e-03, PNorm = 182.6516, GNorm = 0.1656, lr_0 = 1.9161e-04
Loss = 5.9169e-03, PNorm = 182.6536, GNorm = 0.1476, lr_0 = 1.9148e-04
Loss = 5.8748e-03, PNorm = 182.6546, GNorm = 0.0746, lr_0 = 1.9134e-04
Loss = 5.0133e-03, PNorm = 182.6579, GNorm = 0.1750, lr_0 = 1.9121e-04
Loss = 4.0938e-03, PNorm = 182.6624, GNorm = 0.1732, lr_0 = 1.9108e-04
Loss = 3.1350e-03, PNorm = 182.6675, GNorm = 0.2625, lr_0 = 1.9095e-04
Loss = 5.4151e-03, PNorm = 182.6705, GNorm = 0.1075, lr_0 = 1.9082e-04
Loss = 6.5646e-03, PNorm = 182.6735, GNorm = 0.2089, lr_0 = 1.9069e-04
Loss = 4.4160e-03, PNorm = 182.6772, GNorm = 0.1992, lr_0 = 1.9056e-04
Loss = 6.0713e-03, PNorm = 182.6793, GNorm = 0.1535, lr_0 = 1.9043e-04
Loss = 3.6154e-03, PNorm = 182.6845, GNorm = 0.1265, lr_0 = 1.9030e-04
Loss = 4.3703e-03, PNorm = 182.6908, GNorm = 0.1256, lr_0 = 1.9017e-04
Loss = 3.2090e-03, PNorm = 182.6958, GNorm = 0.0863, lr_0 = 1.9004e-04
Loss = 4.0390e-03, PNorm = 182.7005, GNorm = 0.1244, lr_0 = 1.8991e-04
Loss = 4.4066e-03, PNorm = 182.7054, GNorm = 0.0958, lr_0 = 1.8978e-04
Loss = 2.9406e-03, PNorm = 182.7100, GNorm = 0.1187, lr_0 = 1.8965e-04
Loss = 2.4095e-03, PNorm = 182.7147, GNorm = 0.0538, lr_0 = 1.8952e-04
Loss = 3.7256e-03, PNorm = 182.7183, GNorm = 0.2048, lr_0 = 1.8939e-04
Loss = 4.0592e-03, PNorm = 182.7215, GNorm = 0.1271, lr_0 = 1.8926e-04
Loss = 3.9556e-03, PNorm = 182.7261, GNorm = 0.1066, lr_0 = 1.8913e-04
Loss = 3.1883e-03, PNorm = 182.7307, GNorm = 0.0940, lr_0 = 1.8900e-04
Loss = 4.8949e-03, PNorm = 182.7338, GNorm = 0.1680, lr_0 = 1.8887e-04
Loss = 3.8988e-03, PNorm = 182.7381, GNorm = 0.3869, lr_0 = 1.8874e-04
Loss = 4.8371e-03, PNorm = 182.7418, GNorm = 0.4851, lr_0 = 1.8861e-04
Loss = 2.7543e-03, PNorm = 182.7462, GNorm = 0.4292, lr_0 = 1.8848e-04
Loss = 3.1882e-03, PNorm = 182.7505, GNorm = 0.1469, lr_0 = 1.8835e-04
Loss = 4.4382e-03, PNorm = 182.7529, GNorm = 0.1775, lr_0 = 1.8822e-04
Loss = 6.6832e-03, PNorm = 182.7564, GNorm = 0.0946, lr_0 = 1.8809e-04
Loss = 6.1851e-03, PNorm = 182.7600, GNorm = 0.1746, lr_0 = 1.8797e-04
Loss = 5.5550e-03, PNorm = 182.7631, GNorm = 0.1303, lr_0 = 1.8784e-04
Loss = 2.9283e-03, PNorm = 182.7670, GNorm = 0.2803, lr_0 = 1.8771e-04
Loss = 2.7358e-03, PNorm = 182.7710, GNorm = 0.1570, lr_0 = 1.8758e-04
Loss = 2.6494e-03, PNorm = 182.7748, GNorm = 0.1397, lr_0 = 1.8745e-04
Loss = 5.0269e-03, PNorm = 182.7786, GNorm = 0.1634, lr_0 = 1.8732e-04
Loss = 3.0967e-03, PNorm = 182.7795, GNorm = 0.1087, lr_0 = 1.8719e-04
Loss = 4.5955e-03, PNorm = 182.7820, GNorm = 0.0663, lr_0 = 1.8707e-04
Loss = 3.2609e-03, PNorm = 182.7860, GNorm = 0.1472, lr_0 = 1.8694e-04
Loss = 7.6291e-03, PNorm = 182.7896, GNorm = 0.0988, lr_0 = 1.8681e-04
Loss = 3.4425e-03, PNorm = 182.7944, GNorm = 0.1813, lr_0 = 1.8668e-04
Loss = 5.7329e-03, PNorm = 182.7977, GNorm = 0.2131, lr_0 = 1.8655e-04
Loss = 3.9758e-03, PNorm = 182.8006, GNorm = 0.0880, lr_0 = 1.8643e-04
Loss = 5.1437e-03, PNorm = 182.8037, GNorm = 0.0942, lr_0 = 1.8630e-04
Loss = 2.8764e-03, PNorm = 182.8061, GNorm = 0.1108, lr_0 = 1.8617e-04
Loss = 9.2518e-03, PNorm = 182.8098, GNorm = 0.1070, lr_0 = 1.8604e-04
Loss = 4.7852e-03, PNorm = 182.8133, GNorm = 0.1129, lr_0 = 1.8592e-04
Loss = 8.1688e-03, PNorm = 182.8173, GNorm = 0.1412, lr_0 = 1.8579e-04
Loss = 6.8254e-03, PNorm = 182.8207, GNorm = 0.0528, lr_0 = 1.8566e-04
Loss = 8.2057e-03, PNorm = 182.8238, GNorm = 0.3379, lr_0 = 1.8553e-04
Loss = 3.3023e-03, PNorm = 182.8280, GNorm = 0.1471, lr_0 = 1.8541e-04
Loss = 4.5809e-03, PNorm = 182.8311, GNorm = 0.8043, lr_0 = 1.8528e-04
Loss = 5.9926e-03, PNorm = 182.8341, GNorm = 0.3729, lr_0 = 1.8515e-04
Loss = 2.9002e-03, PNorm = 182.8402, GNorm = 0.1937, lr_0 = 1.8503e-04
Loss = 3.3060e-03, PNorm = 182.8447, GNorm = 0.1023, lr_0 = 1.8490e-04
Loss = 3.5954e-03, PNorm = 182.8496, GNorm = 0.1860, lr_0 = 1.8477e-04
Loss = 4.1051e-03, PNorm = 182.8538, GNorm = 0.1211, lr_0 = 1.8465e-04
Loss = 9.1780e-03, PNorm = 182.8577, GNorm = 0.8253, lr_0 = 1.8452e-04
Loss = 7.1722e-03, PNorm = 182.8605, GNorm = 0.0891, lr_0 = 1.8439e-04
Loss = 5.6488e-03, PNorm = 182.8622, GNorm = 0.0732, lr_0 = 1.8427e-04
Loss = 3.5794e-03, PNorm = 182.8656, GNorm = 0.1433, lr_0 = 1.8414e-04
Loss = 8.3063e-03, PNorm = 182.8699, GNorm = 0.1749, lr_0 = 1.8401e-04
Loss = 3.2500e-03, PNorm = 182.8747, GNorm = 0.1006, lr_0 = 1.8389e-04
Loss = 4.0214e-03, PNorm = 182.8790, GNorm = 0.2012, lr_0 = 1.8376e-04
Loss = 6.2342e-03, PNorm = 182.8828, GNorm = 0.2131, lr_0 = 1.8364e-04
Loss = 3.4746e-03, PNorm = 182.8856, GNorm = 0.2592, lr_0 = 1.8351e-04
Loss = 3.7986e-03, PNorm = 182.8898, GNorm = 0.0663, lr_0 = 1.8338e-04
Loss = 6.9769e-03, PNorm = 182.8927, GNorm = 0.0655, lr_0 = 1.8326e-04
Loss = 3.5058e-03, PNorm = 182.8971, GNorm = 0.1290, lr_0 = 1.8313e-04
Loss = 8.2266e-03, PNorm = 182.9021, GNorm = 0.1755, lr_0 = 1.8301e-04
Loss = 7.3654e-03, PNorm = 182.9084, GNorm = 0.5518, lr_0 = 1.8288e-04
Loss = 1.6372e-02, PNorm = 182.9137, GNorm = 0.4197, lr_0 = 1.8276e-04
Loss = 3.2450e-03, PNorm = 182.9167, GNorm = 0.1282, lr_0 = 1.8263e-04
Loss = 4.2887e-03, PNorm = 182.9181, GNorm = 0.1913, lr_0 = 1.8251e-04
Loss = 6.2429e-03, PNorm = 182.9216, GNorm = 0.2150, lr_0 = 1.8238e-04
Loss = 2.6039e-03, PNorm = 182.9264, GNorm = 0.2948, lr_0 = 1.8226e-04
Loss = 8.2532e-03, PNorm = 182.9291, GNorm = 0.9582, lr_0 = 1.8213e-04
Loss = 3.3616e-03, PNorm = 182.9309, GNorm = 0.2592, lr_0 = 1.8201e-04
Loss = 2.5944e-03, PNorm = 182.9339, GNorm = 0.1641, lr_0 = 1.8188e-04
Loss = 4.2325e-03, PNorm = 182.9387, GNorm = 0.0784, lr_0 = 1.8176e-04
Loss = 9.1979e-03, PNorm = 182.9431, GNorm = 0.1738, lr_0 = 1.8163e-04
Loss = 4.5527e-03, PNorm = 182.9477, GNorm = 0.1975, lr_0 = 1.8151e-04
Loss = 3.2301e-03, PNorm = 182.9535, GNorm = 0.2579, lr_0 = 1.8138e-04
Loss = 5.9398e-03, PNorm = 182.9587, GNorm = 0.1403, lr_0 = 1.8126e-04
Loss = 4.6768e-03, PNorm = 182.9633, GNorm = 0.0877, lr_0 = 1.8114e-04
Loss = 5.3285e-03, PNorm = 182.9674, GNorm = 0.1992, lr_0 = 1.8101e-04
Loss = 5.5565e-03, PNorm = 182.9699, GNorm = 0.1296, lr_0 = 1.8089e-04
Loss = 5.3245e-03, PNorm = 182.9748, GNorm = 0.1567, lr_0 = 1.8076e-04
Loss = 7.4913e-03, PNorm = 182.9793, GNorm = 0.0908, lr_0 = 1.8064e-04
Loss = 4.6060e-03, PNorm = 182.9839, GNorm = 0.0976, lr_0 = 1.8052e-04
Loss = 4.3586e-03, PNorm = 182.9873, GNorm = 0.0800, lr_0 = 1.8039e-04
Loss = 3.0817e-03, PNorm = 182.9901, GNorm = 0.1628, lr_0 = 1.8027e-04
Loss = 5.8602e-03, PNorm = 182.9937, GNorm = 0.1370, lr_0 = 1.8015e-04
Loss = 2.5101e-03, PNorm = 182.9987, GNorm = 0.2152, lr_0 = 1.8002e-04
Loss = 5.1261e-03, PNorm = 183.0028, GNorm = 0.0869, lr_0 = 1.7990e-04
Loss = 3.9512e-03, PNorm = 183.0056, GNorm = 0.1182, lr_0 = 1.7978e-04
Loss = 6.2206e-03, PNorm = 183.0084, GNorm = 0.1122, lr_0 = 1.7965e-04
Loss = 6.4759e-03, PNorm = 183.0129, GNorm = 0.1483, lr_0 = 1.7953e-04
Loss = 4.1079e-03, PNorm = 183.0168, GNorm = 0.2047, lr_0 = 1.7941e-04
Loss = 9.1402e-03, PNorm = 183.0205, GNorm = 0.4402, lr_0 = 1.7928e-04
Loss = 1.1421e-02, PNorm = 183.0246, GNorm = 0.1095, lr_0 = 1.7916e-04
Loss = 3.6921e-03, PNorm = 183.0286, GNorm = 0.0900, lr_0 = 1.7904e-04
Loss = 5.4310e-03, PNorm = 183.0329, GNorm = 0.0896, lr_0 = 1.7892e-04
Loss = 6.7155e-03, PNorm = 183.0397, GNorm = 0.4682, lr_0 = 1.7879e-04
Loss = 7.2697e-03, PNorm = 183.0454, GNorm = 0.2350, lr_0 = 1.7867e-04
Loss = 3.2247e-03, PNorm = 183.0477, GNorm = 0.1536, lr_0 = 1.7855e-04
Loss = 4.2472e-03, PNorm = 183.0532, GNorm = 0.0779, lr_0 = 1.7843e-04
Loss = 4.8043e-03, PNorm = 183.0573, GNorm = 0.1666, lr_0 = 1.7830e-04
Loss = 5.2660e-03, PNorm = 183.0590, GNorm = 0.1870, lr_0 = 1.7818e-04
Loss = 5.4334e-03, PNorm = 183.0618, GNorm = 0.1206, lr_0 = 1.7806e-04
Loss = 5.6418e-03, PNorm = 183.0647, GNorm = 0.1579, lr_0 = 1.7794e-04
Loss = 8.1243e-03, PNorm = 183.0694, GNorm = 0.2429, lr_0 = 1.7782e-04
Validation mae = 0.120787
Epoch 23
Loss = 2.9663e-03, PNorm = 183.0739, GNorm = 0.3416, lr_0 = 1.7769e-04
Loss = 2.7406e-03, PNorm = 183.0776, GNorm = 0.0559, lr_0 = 1.7757e-04
Loss = 3.3280e-03, PNorm = 183.0811, GNorm = 0.0761, lr_0 = 1.7745e-04
Loss = 4.5336e-03, PNorm = 183.0829, GNorm = 0.1676, lr_0 = 1.7733e-04
Loss = 3.7643e-03, PNorm = 183.0868, GNorm = 0.2654, lr_0 = 1.7721e-04
Loss = 3.5720e-03, PNorm = 183.0905, GNorm = 0.0879, lr_0 = 1.7709e-04
Loss = 3.5944e-03, PNorm = 183.0931, GNorm = 0.0533, lr_0 = 1.7696e-04
Loss = 3.2793e-03, PNorm = 183.0973, GNorm = 0.0645, lr_0 = 1.7684e-04
Loss = 4.7872e-03, PNorm = 183.1003, GNorm = 0.2197, lr_0 = 1.7672e-04
Loss = 5.9245e-03, PNorm = 183.1055, GNorm = 0.0955, lr_0 = 1.7660e-04
Loss = 3.9860e-03, PNorm = 183.1096, GNorm = 0.0716, lr_0 = 1.7648e-04
Loss = 5.9901e-03, PNorm = 183.1132, GNorm = 0.2795, lr_0 = 1.7636e-04
Loss = 4.2073e-03, PNorm = 183.1164, GNorm = 0.2299, lr_0 = 1.7624e-04
Loss = 3.7948e-03, PNorm = 183.1187, GNorm = 0.0665, lr_0 = 1.7612e-04
Loss = 3.4355e-03, PNorm = 183.1198, GNorm = 0.2970, lr_0 = 1.7600e-04
Loss = 3.0607e-03, PNorm = 183.1216, GNorm = 0.1552, lr_0 = 1.7588e-04
Loss = 6.1311e-03, PNorm = 183.1252, GNorm = 0.1280, lr_0 = 1.7576e-04
Loss = 2.8951e-03, PNorm = 183.1279, GNorm = 0.2499, lr_0 = 1.7564e-04
Loss = 3.7490e-03, PNorm = 183.1325, GNorm = 0.0868, lr_0 = 1.7552e-04
Loss = 3.4980e-03, PNorm = 183.1360, GNorm = 0.1767, lr_0 = 1.7540e-04
Loss = 4.7903e-03, PNorm = 183.1401, GNorm = 0.1015, lr_0 = 1.7528e-04
Loss = 4.2397e-03, PNorm = 183.1419, GNorm = 0.1180, lr_0 = 1.7516e-04
Loss = 5.0862e-03, PNorm = 183.1450, GNorm = 0.2869, lr_0 = 1.7504e-04
Loss = 2.4890e-03, PNorm = 183.1465, GNorm = 0.2089, lr_0 = 1.7492e-04
Loss = 3.0965e-03, PNorm = 183.1505, GNorm = 0.0740, lr_0 = 1.7480e-04
Loss = 3.7745e-03, PNorm = 183.1529, GNorm = 0.1289, lr_0 = 1.7468e-04
Loss = 2.9231e-03, PNorm = 183.1548, GNorm = 0.1796, lr_0 = 1.7456e-04
Loss = 2.4305e-03, PNorm = 183.1584, GNorm = 0.0909, lr_0 = 1.7444e-04
Loss = 3.0415e-03, PNorm = 183.1621, GNorm = 0.0682, lr_0 = 1.7432e-04
Loss = 5.3694e-03, PNorm = 183.1665, GNorm = 0.1445, lr_0 = 1.7420e-04
Loss = 8.8950e-03, PNorm = 183.1707, GNorm = 0.2453, lr_0 = 1.7408e-04
Loss = 2.2312e-03, PNorm = 183.1755, GNorm = 0.1655, lr_0 = 1.7396e-04
Loss = 4.6656e-03, PNorm = 183.1792, GNorm = 0.1078, lr_0 = 1.7384e-04
Loss = 2.6109e-03, PNorm = 183.1814, GNorm = 0.0704, lr_0 = 1.7372e-04
Loss = 3.3814e-03, PNorm = 183.1845, GNorm = 0.2236, lr_0 = 1.7360e-04
Loss = 2.1318e-03, PNorm = 183.1886, GNorm = 0.1543, lr_0 = 1.7348e-04
Loss = 5.2073e-03, PNorm = 183.1897, GNorm = 0.1152, lr_0 = 1.7336e-04
Loss = 7.3621e-03, PNorm = 183.1934, GNorm = 0.2922, lr_0 = 1.7325e-04
Loss = 4.1204e-03, PNorm = 183.1966, GNorm = 0.0980, lr_0 = 1.7313e-04
Loss = 7.1634e-03, PNorm = 183.1982, GNorm = 0.1612, lr_0 = 1.7301e-04
Loss = 5.8397e-03, PNorm = 183.2025, GNorm = 0.2302, lr_0 = 1.7289e-04
Loss = 3.1835e-03, PNorm = 183.2073, GNorm = 0.1451, lr_0 = 1.7277e-04
Loss = 3.4642e-03, PNorm = 183.2106, GNorm = 0.1400, lr_0 = 1.7265e-04
Loss = 4.1058e-03, PNorm = 183.2146, GNorm = 0.0867, lr_0 = 1.7253e-04
Loss = 4.4059e-03, PNorm = 183.2171, GNorm = 0.2530, lr_0 = 1.7242e-04
Loss = 5.0632e-03, PNorm = 183.2216, GNorm = 0.1017, lr_0 = 1.7230e-04
Loss = 4.5791e-03, PNorm = 183.2262, GNorm = 0.1428, lr_0 = 1.7218e-04
Loss = 4.0429e-03, PNorm = 183.2286, GNorm = 0.1141, lr_0 = 1.7206e-04
Loss = 2.1110e-03, PNorm = 183.2305, GNorm = 0.0699, lr_0 = 1.7194e-04
Loss = 2.8772e-03, PNorm = 183.2316, GNorm = 0.0565, lr_0 = 1.7183e-04
Loss = 2.6934e-03, PNorm = 183.2340, GNorm = 0.1291, lr_0 = 1.7171e-04
Loss = 5.1866e-03, PNorm = 183.2382, GNorm = 0.1201, lr_0 = 1.7159e-04
Loss = 4.0869e-03, PNorm = 183.2430, GNorm = 0.1376, lr_0 = 1.7147e-04
Loss = 3.2703e-03, PNorm = 183.2476, GNorm = 0.1145, lr_0 = 1.7136e-04
Loss = 9.5487e-03, PNorm = 183.2514, GNorm = 0.1936, lr_0 = 1.7124e-04
Loss = 5.0201e-03, PNorm = 183.2553, GNorm = 0.2736, lr_0 = 1.7112e-04
Loss = 4.1740e-03, PNorm = 183.2586, GNorm = 0.0838, lr_0 = 1.7100e-04
Loss = 3.2254e-03, PNorm = 183.2612, GNorm = 0.0915, lr_0 = 1.7089e-04
Loss = 4.0011e-03, PNorm = 183.2638, GNorm = 0.0957, lr_0 = 1.7077e-04
Loss = 5.4273e-03, PNorm = 183.2665, GNorm = 0.1072, lr_0 = 1.7065e-04
Loss = 6.4197e-03, PNorm = 183.2674, GNorm = 0.2762, lr_0 = 1.7054e-04
Loss = 3.0264e-03, PNorm = 183.2697, GNorm = 0.0795, lr_0 = 1.7042e-04
Loss = 1.0009e-02, PNorm = 183.2723, GNorm = 0.0718, lr_0 = 1.7030e-04
Loss = 2.4072e-03, PNorm = 183.2752, GNorm = 0.0705, lr_0 = 1.7019e-04
Loss = 4.2538e-03, PNorm = 183.2792, GNorm = 0.0927, lr_0 = 1.7007e-04
Loss = 1.0272e-02, PNorm = 183.2840, GNorm = 0.2678, lr_0 = 1.6995e-04
Loss = 7.9629e-03, PNorm = 183.2866, GNorm = 0.0575, lr_0 = 1.6984e-04
Loss = 2.9097e-03, PNorm = 183.2895, GNorm = 0.1510, lr_0 = 1.6972e-04
Loss = 4.0141e-03, PNorm = 183.2931, GNorm = 0.1206, lr_0 = 1.6960e-04
Loss = 4.9418e-03, PNorm = 183.2971, GNorm = 0.1303, lr_0 = 1.6949e-04
Loss = 4.6001e-03, PNorm = 183.2990, GNorm = 0.1989, lr_0 = 1.6937e-04
Loss = 7.0448e-03, PNorm = 183.3019, GNorm = 0.1548, lr_0 = 1.6926e-04
Loss = 6.0154e-03, PNorm = 183.3042, GNorm = 0.1750, lr_0 = 1.6914e-04
Loss = 2.1992e-03, PNorm = 183.3067, GNorm = 0.1141, lr_0 = 1.6902e-04
Loss = 5.4536e-03, PNorm = 183.3101, GNorm = 0.1596, lr_0 = 1.6891e-04
Loss = 4.1675e-03, PNorm = 183.3119, GNorm = 0.1905, lr_0 = 1.6879e-04
Loss = 6.2777e-03, PNorm = 183.3144, GNorm = 0.2455, lr_0 = 1.6868e-04
Loss = 4.9114e-03, PNorm = 183.3174, GNorm = 0.1172, lr_0 = 1.6856e-04
Loss = 6.4344e-03, PNorm = 183.3204, GNorm = 0.1397, lr_0 = 1.6845e-04
Loss = 4.4154e-03, PNorm = 183.3254, GNorm = 0.1087, lr_0 = 1.6833e-04
Loss = 4.2323e-03, PNorm = 183.3275, GNorm = 0.0958, lr_0 = 1.6821e-04
Loss = 4.2370e-03, PNorm = 183.3312, GNorm = 0.0887, lr_0 = 1.6810e-04
Loss = 4.0956e-03, PNorm = 183.3346, GNorm = 0.1675, lr_0 = 1.6798e-04
Loss = 6.2646e-03, PNorm = 183.3373, GNorm = 0.1010, lr_0 = 1.6787e-04
Loss = 3.6154e-03, PNorm = 183.3423, GNorm = 0.1073, lr_0 = 1.6775e-04
Loss = 2.1345e-03, PNorm = 183.3465, GNorm = 0.0666, lr_0 = 1.6764e-04
Loss = 3.0749e-03, PNorm = 183.3496, GNorm = 0.0658, lr_0 = 1.6752e-04
Loss = 3.0819e-03, PNorm = 183.3513, GNorm = 0.1021, lr_0 = 1.6741e-04
Loss = 2.6069e-03, PNorm = 183.3547, GNorm = 0.1094, lr_0 = 1.6729e-04
Loss = 3.2132e-03, PNorm = 183.3579, GNorm = 0.1097, lr_0 = 1.6718e-04
Loss = 7.3823e-03, PNorm = 183.3624, GNorm = 0.1134, lr_0 = 1.6707e-04
Loss = 3.2781e-03, PNorm = 183.3668, GNorm = 0.0818, lr_0 = 1.6695e-04
Loss = 4.8543e-03, PNorm = 183.3702, GNorm = 0.2526, lr_0 = 1.6684e-04
Loss = 4.1981e-03, PNorm = 183.3735, GNorm = 0.1623, lr_0 = 1.6672e-04
Loss = 5.6330e-03, PNorm = 183.3775, GNorm = 0.3106, lr_0 = 1.6661e-04
Loss = 9.1126e-03, PNorm = 183.3799, GNorm = 0.1503, lr_0 = 1.6649e-04
Loss = 7.7352e-03, PNorm = 183.3823, GNorm = 0.1076, lr_0 = 1.6638e-04
Loss = 2.9319e-03, PNorm = 183.3850, GNorm = 0.2801, lr_0 = 1.6627e-04
Loss = 1.8513e-02, PNorm = 183.3885, GNorm = 0.2620, lr_0 = 1.6615e-04
Loss = 4.9179e-03, PNorm = 183.3907, GNorm = 0.1256, lr_0 = 1.6604e-04
Loss = 3.2248e-03, PNorm = 183.3931, GNorm = 0.1225, lr_0 = 1.6592e-04
Loss = 3.4824e-03, PNorm = 183.3965, GNorm = 0.0893, lr_0 = 1.6581e-04
Loss = 3.5386e-03, PNorm = 183.4011, GNorm = 0.2669, lr_0 = 1.6570e-04
Loss = 3.8006e-03, PNorm = 183.4050, GNorm = 0.1691, lr_0 = 1.6558e-04
Loss = 2.6350e-03, PNorm = 183.4092, GNorm = 0.0621, lr_0 = 1.6547e-04
Loss = 7.6410e-03, PNorm = 183.4129, GNorm = 0.1076, lr_0 = 1.6536e-04
Loss = 4.2262e-03, PNorm = 183.4156, GNorm = 0.1113, lr_0 = 1.6524e-04
Loss = 2.6161e-03, PNorm = 183.4176, GNorm = 0.1498, lr_0 = 1.6513e-04
Loss = 3.7541e-03, PNorm = 183.4200, GNorm = 0.0978, lr_0 = 1.6502e-04
Loss = 3.7672e-03, PNorm = 183.4229, GNorm = 0.1958, lr_0 = 1.6490e-04
Loss = 4.9296e-03, PNorm = 183.4277, GNorm = 0.2817, lr_0 = 1.6479e-04
Loss = 6.6187e-03, PNorm = 183.4303, GNorm = 0.2670, lr_0 = 1.6468e-04
Loss = 9.4521e-03, PNorm = 183.4322, GNorm = 0.3864, lr_0 = 1.6457e-04
Loss = 2.7425e-03, PNorm = 183.4357, GNorm = 0.1185, lr_0 = 1.6445e-04
Loss = 9.7027e-03, PNorm = 183.4382, GNorm = 0.0958, lr_0 = 1.6434e-04
Loss = 3.2312e-03, PNorm = 183.4405, GNorm = 0.1514, lr_0 = 1.6423e-04
Loss = 5.9426e-03, PNorm = 183.4437, GNorm = 0.1352, lr_0 = 1.6412e-04
Loss = 3.6181e-03, PNorm = 183.4461, GNorm = 0.2181, lr_0 = 1.6400e-04
Loss = 6.0113e-03, PNorm = 183.4497, GNorm = 0.2204, lr_0 = 1.6389e-04
Loss = 2.6221e-03, PNorm = 183.4527, GNorm = 0.0740, lr_0 = 1.6378e-04
Validation mae = 0.120641
Epoch 24
Loss = 4.6910e-03, PNorm = 183.4580, GNorm = 0.2498, lr_0 = 1.6367e-04
Loss = 4.4854e-03, PNorm = 183.4617, GNorm = 0.1047, lr_0 = 1.6355e-04
Loss = 4.9776e-03, PNorm = 183.4635, GNorm = 0.8279, lr_0 = 1.6344e-04
Loss = 2.4557e-03, PNorm = 183.4658, GNorm = 0.1939, lr_0 = 1.6333e-04
Loss = 2.2580e-03, PNorm = 183.4677, GNorm = 0.1087, lr_0 = 1.6322e-04
Loss = 3.8256e-03, PNorm = 183.4710, GNorm = 0.1408, lr_0 = 1.6311e-04
Loss = 2.4275e-03, PNorm = 183.4732, GNorm = 0.0632, lr_0 = 1.6299e-04
Loss = 5.4133e-03, PNorm = 183.4752, GNorm = 0.1149, lr_0 = 1.6288e-04
Loss = 2.3802e-03, PNorm = 183.4790, GNorm = 0.1698, lr_0 = 1.6277e-04
Loss = 2.9511e-03, PNorm = 183.4835, GNorm = 0.0861, lr_0 = 1.6266e-04
Loss = 2.8027e-03, PNorm = 183.4856, GNorm = 0.1658, lr_0 = 1.6255e-04
Loss = 3.3003e-03, PNorm = 183.4869, GNorm = 0.1066, lr_0 = 1.6244e-04
Loss = 2.3921e-03, PNorm = 183.4896, GNorm = 0.0874, lr_0 = 1.6233e-04
Loss = 2.5760e-03, PNorm = 183.4923, GNorm = 0.1281, lr_0 = 1.6221e-04
Loss = 6.5260e-03, PNorm = 183.4943, GNorm = 0.1632, lr_0 = 1.6210e-04
Loss = 3.9566e-03, PNorm = 183.4970, GNorm = 0.3072, lr_0 = 1.6199e-04
Loss = 3.3373e-03, PNorm = 183.5000, GNorm = 0.1686, lr_0 = 1.6188e-04
Loss = 3.3643e-03, PNorm = 183.5026, GNorm = 0.1808, lr_0 = 1.6177e-04
Loss = 2.5310e-03, PNorm = 183.5048, GNorm = 0.2302, lr_0 = 1.6166e-04
Loss = 4.4412e-03, PNorm = 183.5068, GNorm = 0.1229, lr_0 = 1.6155e-04
Loss = 4.9659e-03, PNorm = 183.5093, GNorm = 0.1251, lr_0 = 1.6144e-04
Loss = 4.0169e-03, PNorm = 183.5115, GNorm = 0.0857, lr_0 = 1.6133e-04
Loss = 4.3143e-03, PNorm = 183.5147, GNorm = 0.1097, lr_0 = 1.6122e-04
Loss = 6.1561e-03, PNorm = 183.5178, GNorm = 0.1813, lr_0 = 1.6111e-04
Loss = 3.6262e-03, PNorm = 183.5205, GNorm = 0.1139, lr_0 = 1.6100e-04
Loss = 4.2296e-03, PNorm = 183.5230, GNorm = 0.0930, lr_0 = 1.6089e-04
Loss = 7.2575e-03, PNorm = 183.5265, GNorm = 0.1554, lr_0 = 1.6078e-04
Loss = 5.2116e-03, PNorm = 183.5275, GNorm = 0.1785, lr_0 = 1.6067e-04
Loss = 3.2171e-03, PNorm = 183.5288, GNorm = 0.1194, lr_0 = 1.6056e-04
Loss = 5.4332e-03, PNorm = 183.5320, GNorm = 0.2540, lr_0 = 1.6045e-04
Loss = 2.7438e-03, PNorm = 183.5359, GNorm = 0.0978, lr_0 = 1.6034e-04
Loss = 4.9572e-03, PNorm = 183.5381, GNorm = 0.0849, lr_0 = 1.6023e-04
Loss = 3.2152e-03, PNorm = 183.5407, GNorm = 0.1573, lr_0 = 1.6012e-04
Loss = 3.4630e-03, PNorm = 183.5419, GNorm = 0.0751, lr_0 = 1.6001e-04
Loss = 5.1499e-03, PNorm = 183.5435, GNorm = 0.0955, lr_0 = 1.5990e-04
Loss = 3.3817e-03, PNorm = 183.5466, GNorm = 0.1510, lr_0 = 1.5979e-04
Loss = 2.8829e-03, PNorm = 183.5478, GNorm = 0.1745, lr_0 = 1.5968e-04
Loss = 4.4212e-03, PNorm = 183.5508, GNorm = 0.2365, lr_0 = 1.5957e-04
Loss = 5.9571e-03, PNorm = 183.5531, GNorm = 0.1125, lr_0 = 1.5946e-04
Loss = 8.9526e-03, PNorm = 183.5549, GNorm = 0.2179, lr_0 = 1.5935e-04
Loss = 4.9711e-03, PNorm = 183.5584, GNorm = 0.2250, lr_0 = 1.5924e-04
Loss = 6.0423e-03, PNorm = 183.5603, GNorm = 0.1425, lr_0 = 1.5913e-04
Loss = 6.9771e-03, PNorm = 183.5633, GNorm = 0.3605, lr_0 = 1.5902e-04
Loss = 2.4987e-03, PNorm = 183.5674, GNorm = 0.1921, lr_0 = 1.5891e-04
Loss = 3.6054e-03, PNorm = 183.5713, GNorm = 0.2604, lr_0 = 1.5880e-04
Loss = 4.0622e-03, PNorm = 183.5733, GNorm = 0.1249, lr_0 = 1.5870e-04
Loss = 1.9736e-03, PNorm = 183.5739, GNorm = 0.1595, lr_0 = 1.5859e-04
Loss = 5.7916e-03, PNorm = 183.5760, GNorm = 0.2181, lr_0 = 1.5848e-04
Loss = 6.7560e-03, PNorm = 183.5787, GNorm = 0.2170, lr_0 = 1.5837e-04
Loss = 4.2865e-03, PNorm = 183.5828, GNorm = 0.1274, lr_0 = 1.5826e-04
Loss = 3.1992e-03, PNorm = 183.5851, GNorm = 0.0679, lr_0 = 1.5815e-04
Loss = 2.0430e-03, PNorm = 183.5886, GNorm = 0.0708, lr_0 = 1.5804e-04
Loss = 3.7739e-03, PNorm = 183.5916, GNorm = 0.1306, lr_0 = 1.5794e-04
Loss = 2.2476e-03, PNorm = 183.5949, GNorm = 0.1241, lr_0 = 1.5783e-04
Loss = 3.5024e-03, PNorm = 183.5980, GNorm = 0.1942, lr_0 = 1.5772e-04
Loss = 2.5719e-03, PNorm = 183.6027, GNorm = 0.0637, lr_0 = 1.5761e-04
Loss = 5.5072e-03, PNorm = 183.6061, GNorm = 0.0747, lr_0 = 1.5750e-04
Loss = 3.2722e-03, PNorm = 183.6086, GNorm = 0.1137, lr_0 = 1.5740e-04
Loss = 4.1152e-03, PNorm = 183.6099, GNorm = 0.1819, lr_0 = 1.5729e-04
Loss = 1.0015e-02, PNorm = 183.6113, GNorm = 0.2111, lr_0 = 1.5718e-04
Loss = 5.2965e-03, PNorm = 183.6139, GNorm = 0.1492, lr_0 = 1.5707e-04
Loss = 5.4623e-03, PNorm = 183.6176, GNorm = 0.1115, lr_0 = 1.5697e-04
Loss = 2.2812e-03, PNorm = 183.6209, GNorm = 0.0887, lr_0 = 1.5686e-04
Loss = 3.4840e-03, PNorm = 183.6250, GNorm = 0.1291, lr_0 = 1.5675e-04
Loss = 5.5689e-03, PNorm = 183.6296, GNorm = 0.1525, lr_0 = 1.5664e-04
Loss = 2.7885e-03, PNorm = 183.6335, GNorm = 0.0737, lr_0 = 1.5654e-04
Loss = 4.4985e-03, PNorm = 183.6365, GNorm = 0.1701, lr_0 = 1.5643e-04
Loss = 3.8837e-03, PNorm = 183.6397, GNorm = 0.1896, lr_0 = 1.5632e-04
Loss = 3.4869e-03, PNorm = 183.6422, GNorm = 0.0726, lr_0 = 1.5621e-04
Loss = 2.8258e-03, PNorm = 183.6451, GNorm = 0.1285, lr_0 = 1.5611e-04
Loss = 6.7335e-03, PNorm = 183.6485, GNorm = 0.1640, lr_0 = 1.5600e-04
Loss = 6.9735e-03, PNorm = 183.6499, GNorm = 0.1783, lr_0 = 1.5589e-04
Loss = 2.3150e-03, PNorm = 183.6523, GNorm = 0.1593, lr_0 = 1.5579e-04
Loss = 5.2884e-03, PNorm = 183.6569, GNorm = 0.1581, lr_0 = 1.5568e-04
Loss = 2.6986e-03, PNorm = 183.6598, GNorm = 0.0988, lr_0 = 1.5557e-04
Loss = 4.0214e-03, PNorm = 183.6626, GNorm = 0.3378, lr_0 = 1.5547e-04
Loss = 4.1108e-03, PNorm = 183.6648, GNorm = 0.1129, lr_0 = 1.5536e-04
Loss = 2.2360e-03, PNorm = 183.6663, GNorm = 0.1728, lr_0 = 1.5525e-04
Loss = 2.5965e-03, PNorm = 183.6708, GNorm = 0.0750, lr_0 = 1.5515e-04
Loss = 2.2139e-03, PNorm = 183.6752, GNorm = 0.2626, lr_0 = 1.5504e-04
Loss = 4.6698e-03, PNorm = 183.6784, GNorm = 0.0569, lr_0 = 1.5493e-04
Loss = 4.3302e-03, PNorm = 183.6812, GNorm = 0.2380, lr_0 = 1.5483e-04
Loss = 2.1091e-03, PNorm = 183.6842, GNorm = 0.1379, lr_0 = 1.5472e-04
Loss = 3.2532e-03, PNorm = 183.6857, GNorm = 0.0749, lr_0 = 1.5462e-04
Loss = 2.9155e-03, PNorm = 183.6877, GNorm = 0.0942, lr_0 = 1.5451e-04
Loss = 3.5521e-03, PNorm = 183.6899, GNorm = 0.1296, lr_0 = 1.5440e-04
Loss = 5.1220e-03, PNorm = 183.6920, GNorm = 0.1002, lr_0 = 1.5430e-04
Loss = 2.5074e-03, PNorm = 183.6958, GNorm = 0.1791, lr_0 = 1.5419e-04
Loss = 3.6613e-03, PNorm = 183.6979, GNorm = 0.2313, lr_0 = 1.5409e-04
Loss = 4.9368e-03, PNorm = 183.6988, GNorm = 0.1655, lr_0 = 1.5398e-04
Loss = 6.1108e-03, PNorm = 183.7007, GNorm = 0.0809, lr_0 = 1.5388e-04
Loss = 4.1983e-03, PNorm = 183.7027, GNorm = 0.1772, lr_0 = 1.5377e-04
Loss = 2.1839e-03, PNorm = 183.7052, GNorm = 0.0663, lr_0 = 1.5367e-04
Loss = 1.9262e-03, PNorm = 183.7069, GNorm = 0.1272, lr_0 = 1.5356e-04
Loss = 2.6992e-03, PNorm = 183.7099, GNorm = 0.1554, lr_0 = 1.5346e-04
Loss = 3.4500e-03, PNorm = 183.7131, GNorm = 0.0699, lr_0 = 1.5335e-04
Loss = 3.9408e-03, PNorm = 183.7170, GNorm = 0.1389, lr_0 = 1.5325e-04
Loss = 3.7955e-03, PNorm = 183.7177, GNorm = 0.2824, lr_0 = 1.5314e-04
Loss = 5.3456e-03, PNorm = 183.7204, GNorm = 0.0718, lr_0 = 1.5304e-04
Loss = 3.0903e-03, PNorm = 183.7223, GNorm = 0.1517, lr_0 = 1.5293e-04
Loss = 4.3742e-03, PNorm = 183.7244, GNorm = 0.0761, lr_0 = 1.5283e-04
Loss = 3.7559e-03, PNorm = 183.7264, GNorm = 0.1005, lr_0 = 1.5272e-04
Loss = 2.3646e-03, PNorm = 183.7274, GNorm = 0.1347, lr_0 = 1.5262e-04
Loss = 3.4437e-03, PNorm = 183.7295, GNorm = 0.0979, lr_0 = 1.5251e-04
Loss = 4.7100e-03, PNorm = 183.7332, GNorm = 0.1440, lr_0 = 1.5241e-04
Loss = 6.3331e-03, PNorm = 183.7360, GNorm = 0.1444, lr_0 = 1.5230e-04
Loss = 8.7352e-03, PNorm = 183.7392, GNorm = 0.1677, lr_0 = 1.5220e-04
Loss = 3.6168e-03, PNorm = 183.7436, GNorm = 0.1329, lr_0 = 1.5209e-04
Loss = 8.0511e-03, PNorm = 183.7482, GNorm = 0.1544, lr_0 = 1.5199e-04
Loss = 2.5326e-03, PNorm = 183.7522, GNorm = 0.0646, lr_0 = 1.5189e-04
Loss = 2.3317e-03, PNorm = 183.7545, GNorm = 0.1443, lr_0 = 1.5178e-04
Loss = 7.5987e-03, PNorm = 183.7568, GNorm = 0.0780, lr_0 = 1.5168e-04
Loss = 6.4748e-03, PNorm = 183.7570, GNorm = 0.2133, lr_0 = 1.5157e-04
Loss = 7.9830e-03, PNorm = 183.7614, GNorm = 0.0618, lr_0 = 1.5147e-04
Loss = 2.0010e-03, PNorm = 183.7641, GNorm = 0.3201, lr_0 = 1.5137e-04
Loss = 3.3985e-03, PNorm = 183.7659, GNorm = 0.0635, lr_0 = 1.5126e-04
Loss = 3.2644e-03, PNorm = 183.7685, GNorm = 0.0859, lr_0 = 1.5116e-04
Loss = 1.5554e-02, PNorm = 183.7723, GNorm = 0.2328, lr_0 = 1.5106e-04
Loss = 4.4156e-03, PNorm = 183.7731, GNorm = 0.1449, lr_0 = 1.5095e-04
Loss = 2.3793e-03, PNorm = 183.7742, GNorm = 0.2873, lr_0 = 1.5085e-04
Validation mae = 0.120490
Epoch 25
Loss = 4.5734e-03, PNorm = 183.7754, GNorm = 0.0704, lr_0 = 1.5075e-04
Loss = 8.2892e-03, PNorm = 183.7783, GNorm = 0.2467, lr_0 = 1.5064e-04
Loss = 3.2381e-03, PNorm = 183.7808, GNorm = 0.0552, lr_0 = 1.5054e-04
Loss = 2.8380e-03, PNorm = 183.7826, GNorm = 0.0733, lr_0 = 1.5044e-04
Loss = 1.5678e-03, PNorm = 183.7848, GNorm = 0.0762, lr_0 = 1.5033e-04
Loss = 3.8043e-03, PNorm = 183.7864, GNorm = 0.1127, lr_0 = 1.5023e-04
Loss = 2.7630e-03, PNorm = 183.7890, GNorm = 0.1896, lr_0 = 1.5013e-04
Loss = 2.4856e-03, PNorm = 183.7907, GNorm = 0.1122, lr_0 = 1.5002e-04
Loss = 2.3192e-03, PNorm = 183.7922, GNorm = 0.1022, lr_0 = 1.4992e-04
Loss = 2.4993e-03, PNorm = 183.7943, GNorm = 0.0771, lr_0 = 1.4982e-04
Loss = 2.1581e-03, PNorm = 183.7953, GNorm = 0.0896, lr_0 = 1.4972e-04
Loss = 1.9546e-03, PNorm = 183.7967, GNorm = 0.0905, lr_0 = 1.4961e-04
Loss = 3.2338e-03, PNorm = 183.7985, GNorm = 0.1072, lr_0 = 1.4951e-04
Loss = 9.6169e-03, PNorm = 183.8014, GNorm = 0.2565, lr_0 = 1.4941e-04
Loss = 4.3356e-03, PNorm = 183.8040, GNorm = 0.0620, lr_0 = 1.4931e-04
Loss = 2.9429e-03, PNorm = 183.8062, GNorm = 0.1040, lr_0 = 1.4920e-04
Loss = 2.4939e-03, PNorm = 183.8079, GNorm = 0.2067, lr_0 = 1.4910e-04
Loss = 3.9378e-03, PNorm = 183.8082, GNorm = 0.1082, lr_0 = 1.4900e-04
Loss = 1.6395e-03, PNorm = 183.8094, GNorm = 0.1512, lr_0 = 1.4890e-04
Loss = 3.1396e-03, PNorm = 183.8105, GNorm = 0.1138, lr_0 = 1.4880e-04
Loss = 5.6331e-03, PNorm = 183.8119, GNorm = 0.0814, lr_0 = 1.4869e-04
Loss = 5.3348e-03, PNorm = 183.8143, GNorm = 0.2030, lr_0 = 1.4859e-04
Loss = 5.7544e-03, PNorm = 183.8157, GNorm = 0.0696, lr_0 = 1.4849e-04
Loss = 1.8798e-03, PNorm = 183.8178, GNorm = 0.0890, lr_0 = 1.4839e-04
Loss = 2.8634e-03, PNorm = 183.8200, GNorm = 0.2843, lr_0 = 1.4829e-04
Loss = 2.3120e-03, PNorm = 183.8228, GNorm = 0.1439, lr_0 = 1.4818e-04
Loss = 5.6997e-03, PNorm = 183.8241, GNorm = 0.0683, lr_0 = 1.4808e-04
Loss = 3.8158e-03, PNorm = 183.8276, GNorm = 0.0906, lr_0 = 1.4798e-04
Loss = 2.9562e-03, PNorm = 183.8302, GNorm = 0.0777, lr_0 = 1.4788e-04
Loss = 1.8992e-03, PNorm = 183.8322, GNorm = 0.0628, lr_0 = 1.4778e-04
Loss = 2.5315e-03, PNorm = 183.8344, GNorm = 0.1082, lr_0 = 1.4768e-04
Loss = 2.6942e-03, PNorm = 183.8364, GNorm = 0.1406, lr_0 = 1.4758e-04
Loss = 4.6769e-03, PNorm = 183.8384, GNorm = 0.1319, lr_0 = 1.4748e-04
Loss = 3.3555e-03, PNorm = 183.8412, GNorm = 0.0753, lr_0 = 1.4737e-04
Loss = 2.4435e-03, PNorm = 183.8441, GNorm = 0.0377, lr_0 = 1.4727e-04
Loss = 1.7526e-03, PNorm = 183.8459, GNorm = 0.1012, lr_0 = 1.4717e-04
Loss = 1.6917e-03, PNorm = 183.8495, GNorm = 0.1181, lr_0 = 1.4707e-04
Loss = 1.7534e-03, PNorm = 183.8516, GNorm = 0.0831, lr_0 = 1.4697e-04
Loss = 1.9320e-03, PNorm = 183.8527, GNorm = 0.0830, lr_0 = 1.4687e-04
Loss = 2.6697e-03, PNorm = 183.8549, GNorm = 0.0837, lr_0 = 1.4677e-04
Loss = 3.7110e-03, PNorm = 183.8565, GNorm = 0.1924, lr_0 = 1.4667e-04
Loss = 5.0904e-03, PNorm = 183.8588, GNorm = 0.0452, lr_0 = 1.4657e-04
Loss = 1.8882e-03, PNorm = 183.8603, GNorm = 0.1059, lr_0 = 1.4647e-04
Loss = 3.3017e-03, PNorm = 183.8636, GNorm = 0.0855, lr_0 = 1.4637e-04
Loss = 2.1404e-03, PNorm = 183.8657, GNorm = 0.0860, lr_0 = 1.4627e-04
Loss = 4.6926e-03, PNorm = 183.8673, GNorm = 0.2164, lr_0 = 1.4617e-04
Loss = 4.0018e-03, PNorm = 183.8690, GNorm = 0.1073, lr_0 = 1.4607e-04
Loss = 4.1360e-03, PNorm = 183.8713, GNorm = 0.0646, lr_0 = 1.4597e-04
Loss = 3.4347e-03, PNorm = 183.8741, GNorm = 0.5161, lr_0 = 1.4587e-04
Loss = 4.2962e-03, PNorm = 183.8769, GNorm = 0.0933, lr_0 = 1.4577e-04
Loss = 2.1146e-03, PNorm = 183.8798, GNorm = 0.0926, lr_0 = 1.4567e-04
Loss = 4.0744e-03, PNorm = 183.8828, GNorm = 0.0858, lr_0 = 1.4557e-04
Loss = 4.4218e-03, PNorm = 183.8836, GNorm = 0.0691, lr_0 = 1.4547e-04
Loss = 1.9096e-03, PNorm = 183.8853, GNorm = 0.1590, lr_0 = 1.4537e-04
Loss = 3.7297e-03, PNorm = 183.8890, GNorm = 0.0847, lr_0 = 1.4527e-04
Loss = 1.5530e-03, PNorm = 183.8920, GNorm = 0.1012, lr_0 = 1.4517e-04
Loss = 3.0058e-03, PNorm = 183.8947, GNorm = 0.0631, lr_0 = 1.4507e-04
Loss = 2.1590e-03, PNorm = 183.8960, GNorm = 0.0853, lr_0 = 1.4497e-04
Loss = 3.6642e-03, PNorm = 183.8960, GNorm = 0.1007, lr_0 = 1.4487e-04
Loss = 3.9026e-03, PNorm = 183.8964, GNorm = 0.0990, lr_0 = 1.4477e-04
Loss = 5.8009e-03, PNorm = 183.8991, GNorm = 0.7062, lr_0 = 1.4467e-04
Loss = 2.8289e-03, PNorm = 183.8998, GNorm = 0.0969, lr_0 = 1.4457e-04
Loss = 5.9326e-03, PNorm = 183.9025, GNorm = 0.1660, lr_0 = 1.4447e-04
Loss = 1.8781e-03, PNorm = 183.9049, GNorm = 0.1163, lr_0 = 1.4438e-04
Loss = 3.3329e-03, PNorm = 183.9082, GNorm = 0.0915, lr_0 = 1.4428e-04
Loss = 4.1670e-03, PNorm = 183.9116, GNorm = 0.0716, lr_0 = 1.4418e-04
Loss = 2.1786e-03, PNorm = 183.9135, GNorm = 0.1129, lr_0 = 1.4408e-04
Loss = 1.5252e-02, PNorm = 183.9157, GNorm = 2.9719, lr_0 = 1.4398e-04
Loss = 9.4274e-03, PNorm = 183.9204, GNorm = 0.2286, lr_0 = 1.4388e-04
Loss = 5.5310e-03, PNorm = 183.9218, GNorm = 0.1760, lr_0 = 1.4378e-04
Loss = 4.2468e-03, PNorm = 183.9249, GNorm = 0.1854, lr_0 = 1.4368e-04
Loss = 3.3108e-03, PNorm = 183.9287, GNorm = 0.0787, lr_0 = 1.4359e-04
Loss = 2.7659e-03, PNorm = 183.9308, GNorm = 0.1421, lr_0 = 1.4349e-04
Loss = 6.7935e-03, PNorm = 183.9337, GNorm = 0.2473, lr_0 = 1.4339e-04
Loss = 2.8563e-03, PNorm = 183.9365, GNorm = 0.1416, lr_0 = 1.4329e-04
Loss = 9.4130e-03, PNorm = 183.9397, GNorm = 0.1543, lr_0 = 1.4319e-04
Loss = 5.5308e-03, PNorm = 183.9416, GNorm = 0.1453, lr_0 = 1.4310e-04
Loss = 4.5557e-03, PNorm = 183.9429, GNorm = 0.1304, lr_0 = 1.4300e-04
Loss = 1.6534e-03, PNorm = 183.9450, GNorm = 0.0721, lr_0 = 1.4290e-04
Loss = 2.3959e-03, PNorm = 183.9464, GNorm = 0.0510, lr_0 = 1.4280e-04
Loss = 6.1411e-03, PNorm = 183.9474, GNorm = 0.1408, lr_0 = 1.4270e-04
Loss = 2.3812e-03, PNorm = 183.9482, GNorm = 0.1285, lr_0 = 1.4261e-04
Loss = 4.6154e-03, PNorm = 183.9503, GNorm = 0.0492, lr_0 = 1.4251e-04
Loss = 3.1634e-03, PNorm = 183.9533, GNorm = 0.0905, lr_0 = 1.4241e-04
Loss = 2.4436e-03, PNorm = 183.9555, GNorm = 0.0927, lr_0 = 1.4231e-04
Loss = 2.3767e-03, PNorm = 183.9592, GNorm = 0.0911, lr_0 = 1.4222e-04
Loss = 2.9159e-03, PNorm = 183.9609, GNorm = 0.0976, lr_0 = 1.4212e-04
Loss = 2.2726e-03, PNorm = 183.9634, GNorm = 0.1334, lr_0 = 1.4202e-04
Loss = 3.6372e-03, PNorm = 183.9651, GNorm = 0.0482, lr_0 = 1.4192e-04
Loss = 2.7779e-03, PNorm = 183.9675, GNorm = 0.2300, lr_0 = 1.4183e-04
Loss = 1.1487e-02, PNorm = 183.9698, GNorm = 0.4454, lr_0 = 1.4173e-04
Loss = 4.8590e-03, PNorm = 183.9710, GNorm = 0.0818, lr_0 = 1.4163e-04
Loss = 5.4015e-03, PNorm = 183.9731, GNorm = 0.0825, lr_0 = 1.4153e-04
Loss = 1.9621e-03, PNorm = 183.9760, GNorm = 0.1431, lr_0 = 1.4144e-04
Loss = 1.1008e-02, PNorm = 183.9769, GNorm = 1.4712, lr_0 = 1.4134e-04
Loss = 1.0975e-02, PNorm = 183.9811, GNorm = 0.0583, lr_0 = 1.4124e-04
Loss = 1.7000e-03, PNorm = 183.9827, GNorm = 0.0841, lr_0 = 1.4115e-04
Loss = 2.5290e-03, PNorm = 183.9847, GNorm = 0.0772, lr_0 = 1.4105e-04
Loss = 2.5948e-03, PNorm = 183.9869, GNorm = 0.1120, lr_0 = 1.4095e-04
Loss = 5.1053e-03, PNorm = 183.9894, GNorm = 0.1568, lr_0 = 1.4086e-04
Loss = 5.6533e-03, PNorm = 183.9931, GNorm = 0.0921, lr_0 = 1.4076e-04
Loss = 5.9674e-03, PNorm = 183.9963, GNorm = 0.0584, lr_0 = 1.4066e-04
Loss = 4.2343e-03, PNorm = 184.0000, GNorm = 0.0708, lr_0 = 1.4057e-04
Loss = 5.3251e-03, PNorm = 184.0021, GNorm = 0.2095, lr_0 = 1.4047e-04
Loss = 2.3746e-03, PNorm = 184.0049, GNorm = 0.1124, lr_0 = 1.4038e-04
Loss = 3.8041e-03, PNorm = 184.0083, GNorm = 0.1109, lr_0 = 1.4028e-04
Loss = 1.9945e-03, PNorm = 184.0111, GNorm = 0.1717, lr_0 = 1.4018e-04
Loss = 1.4149e-03, PNorm = 184.0139, GNorm = 0.0601, lr_0 = 1.4009e-04
Loss = 2.6228e-03, PNorm = 184.0166, GNorm = 0.0954, lr_0 = 1.3999e-04
Loss = 3.0560e-03, PNorm = 184.0191, GNorm = 0.0761, lr_0 = 1.3990e-04
Loss = 7.6986e-03, PNorm = 184.0213, GNorm = 0.0515, lr_0 = 1.3980e-04
Loss = 3.5345e-03, PNorm = 184.0232, GNorm = 0.1146, lr_0 = 1.3970e-04
Loss = 3.6904e-03, PNorm = 184.0257, GNorm = 0.1942, lr_0 = 1.3961e-04
Loss = 3.3441e-03, PNorm = 184.0280, GNorm = 0.1956, lr_0 = 1.3951e-04
Loss = 3.8804e-03, PNorm = 184.0306, GNorm = 0.1237, lr_0 = 1.3942e-04
Loss = 3.7590e-03, PNorm = 184.0325, GNorm = 0.0640, lr_0 = 1.3932e-04
Loss = 1.6412e-03, PNorm = 184.0357, GNorm = 0.0624, lr_0 = 1.3923e-04
Loss = 2.5466e-03, PNorm = 184.0371, GNorm = 0.0896, lr_0 = 1.3913e-04
Loss = 3.5407e-03, PNorm = 184.0388, GNorm = 0.1890, lr_0 = 1.3904e-04
Loss = 4.4026e-03, PNorm = 184.0410, GNorm = 0.1105, lr_0 = 1.3894e-04
Validation mae = 0.120614
Epoch 26
Loss = 1.2276e-03, PNorm = 184.0431, GNorm = 0.1877, lr_0 = 1.3884e-04
Loss = 4.5092e-03, PNorm = 184.0439, GNorm = 0.0716, lr_0 = 1.3875e-04
Loss = 5.3577e-03, PNorm = 184.0442, GNorm = 0.1612, lr_0 = 1.3865e-04
Loss = 2.0673e-03, PNorm = 184.0463, GNorm = 0.0977, lr_0 = 1.3856e-04
Loss = 1.8160e-03, PNorm = 184.0482, GNorm = 0.0404, lr_0 = 1.3846e-04
Loss = 2.4193e-03, PNorm = 184.0521, GNorm = 0.1028, lr_0 = 1.3837e-04
Loss = 1.5699e-03, PNorm = 184.0534, GNorm = 0.0680, lr_0 = 1.3828e-04
Loss = 2.4969e-03, PNorm = 184.0543, GNorm = 0.0750, lr_0 = 1.3818e-04
Loss = 5.5613e-03, PNorm = 184.0571, GNorm = 0.0789, lr_0 = 1.3809e-04
Loss = 5.5551e-03, PNorm = 184.0602, GNorm = 0.1468, lr_0 = 1.3799e-04
Loss = 2.4000e-03, PNorm = 184.0610, GNorm = 0.0718, lr_0 = 1.3790e-04
Loss = 1.5078e-03, PNorm = 184.0622, GNorm = 0.0695, lr_0 = 1.3780e-04
Loss = 3.5625e-03, PNorm = 184.0628, GNorm = 0.0545, lr_0 = 1.3771e-04
Loss = 2.0225e-03, PNorm = 184.0633, GNorm = 0.1469, lr_0 = 1.3761e-04
Loss = 1.5955e-03, PNorm = 184.0631, GNorm = 0.0958, lr_0 = 1.3752e-04
Loss = 1.7777e-03, PNorm = 184.0642, GNorm = 0.1202, lr_0 = 1.3742e-04
Loss = 2.3823e-03, PNorm = 184.0659, GNorm = 0.0661, lr_0 = 1.3733e-04
Loss = 5.0579e-03, PNorm = 184.0665, GNorm = 0.1168, lr_0 = 1.3724e-04
Loss = 1.5794e-03, PNorm = 184.0673, GNorm = 0.0657, lr_0 = 1.3714e-04
Loss = 2.6376e-03, PNorm = 184.0685, GNorm = 0.0548, lr_0 = 1.3705e-04
Loss = 6.7679e-03, PNorm = 184.0696, GNorm = 0.0996, lr_0 = 1.3695e-04
Loss = 2.7604e-03, PNorm = 184.0726, GNorm = 0.0735, lr_0 = 1.3686e-04
Loss = 1.5224e-03, PNorm = 184.0754, GNorm = 0.0716, lr_0 = 1.3677e-04
Loss = 2.2974e-03, PNorm = 184.0762, GNorm = 0.1515, lr_0 = 1.3667e-04
Loss = 1.4269e-03, PNorm = 184.0770, GNorm = 0.1189, lr_0 = 1.3658e-04
Loss = 4.7645e-03, PNorm = 184.0787, GNorm = 0.1116, lr_0 = 1.3649e-04
Loss = 3.3759e-03, PNorm = 184.0809, GNorm = 0.0922, lr_0 = 1.3639e-04
Loss = 1.4978e-03, PNorm = 184.0841, GNorm = 0.0672, lr_0 = 1.3630e-04
Loss = 3.8800e-03, PNorm = 184.0874, GNorm = 0.0974, lr_0 = 1.3621e-04
Loss = 3.1628e-03, PNorm = 184.0903, GNorm = 0.1019, lr_0 = 1.3611e-04
Loss = 2.8062e-03, PNorm = 184.0904, GNorm = 0.1069, lr_0 = 1.3602e-04
Loss = 1.6930e-03, PNorm = 184.0933, GNorm = 0.0880, lr_0 = 1.3593e-04
Loss = 6.0158e-03, PNorm = 184.0965, GNorm = 0.2010, lr_0 = 1.3583e-04
Loss = 1.1083e-02, PNorm = 184.0983, GNorm = 0.0822, lr_0 = 1.3574e-04
Loss = 2.8821e-03, PNorm = 184.1023, GNorm = 0.0936, lr_0 = 1.3565e-04
Loss = 1.7694e-03, PNorm = 184.1041, GNorm = 0.1472, lr_0 = 1.3555e-04
Loss = 6.3826e-03, PNorm = 184.1069, GNorm = 0.0878, lr_0 = 1.3546e-04
Loss = 5.8413e-03, PNorm = 184.1097, GNorm = 0.1464, lr_0 = 1.3537e-04
Loss = 9.5801e-03, PNorm = 184.1093, GNorm = 0.2971, lr_0 = 1.3528e-04
Loss = 3.3954e-03, PNorm = 184.1123, GNorm = 0.2362, lr_0 = 1.3518e-04
Loss = 4.2427e-03, PNorm = 184.1158, GNorm = 0.6212, lr_0 = 1.3509e-04
Loss = 3.8925e-03, PNorm = 184.1181, GNorm = 0.0589, lr_0 = 1.3500e-04
Loss = 2.9213e-03, PNorm = 184.1207, GNorm = 0.1151, lr_0 = 1.3491e-04
Loss = 2.4425e-03, PNorm = 184.1232, GNorm = 0.0791, lr_0 = 1.3481e-04
Loss = 2.8509e-03, PNorm = 184.1245, GNorm = 0.1386, lr_0 = 1.3472e-04
Loss = 1.8686e-03, PNorm = 184.1267, GNorm = 0.1311, lr_0 = 1.3463e-04
Loss = 2.9741e-03, PNorm = 184.1288, GNorm = 0.1304, lr_0 = 1.3454e-04
Loss = 1.5840e-03, PNorm = 184.1311, GNorm = 0.0458, lr_0 = 1.3444e-04
Loss = 2.2622e-03, PNorm = 184.1334, GNorm = 0.1113, lr_0 = 1.3435e-04
Loss = 3.6691e-03, PNorm = 184.1374, GNorm = 0.0900, lr_0 = 1.3426e-04
Loss = 1.7491e-03, PNorm = 184.1404, GNorm = 0.1542, lr_0 = 1.3417e-04
Loss = 5.7556e-03, PNorm = 184.1423, GNorm = 0.1119, lr_0 = 1.3408e-04
Loss = 2.7006e-03, PNorm = 184.1440, GNorm = 0.0774, lr_0 = 1.3398e-04
Loss = 3.2599e-03, PNorm = 184.1453, GNorm = 0.1815, lr_0 = 1.3389e-04
Loss = 3.2828e-03, PNorm = 184.1477, GNorm = 0.0571, lr_0 = 1.3380e-04
Loss = 3.0021e-03, PNorm = 184.1488, GNorm = 0.4002, lr_0 = 1.3371e-04
Loss = 5.6384e-03, PNorm = 184.1507, GNorm = 0.0832, lr_0 = 1.3362e-04
Loss = 5.6301e-03, PNorm = 184.1517, GNorm = 0.1000, lr_0 = 1.3353e-04
Loss = 2.9376e-03, PNorm = 184.1530, GNorm = 0.0532, lr_0 = 1.3343e-04
Loss = 2.1617e-03, PNorm = 184.1550, GNorm = 0.1086, lr_0 = 1.3334e-04
Loss = 2.0144e-03, PNorm = 184.1579, GNorm = 0.1643, lr_0 = 1.3325e-04
Loss = 1.8537e-03, PNorm = 184.1596, GNorm = 0.0933, lr_0 = 1.3316e-04
Loss = 1.2032e-03, PNorm = 184.1611, GNorm = 0.0728, lr_0 = 1.3307e-04
Loss = 3.7818e-03, PNorm = 184.1629, GNorm = 0.1245, lr_0 = 1.3298e-04
Loss = 2.8455e-03, PNorm = 184.1655, GNorm = 0.1008, lr_0 = 1.3289e-04
Loss = 4.2554e-03, PNorm = 184.1689, GNorm = 0.0608, lr_0 = 1.3280e-04
Loss = 4.0323e-03, PNorm = 184.1710, GNorm = 0.1349, lr_0 = 1.3270e-04
Loss = 2.0731e-03, PNorm = 184.1726, GNorm = 0.0928, lr_0 = 1.3261e-04
Loss = 1.7780e-03, PNorm = 184.1745, GNorm = 0.1154, lr_0 = 1.3252e-04
Loss = 6.9204e-03, PNorm = 184.1772, GNorm = 0.2266, lr_0 = 1.3243e-04
Loss = 2.7942e-03, PNorm = 184.1789, GNorm = 0.1041, lr_0 = 1.3234e-04
Loss = 2.5760e-03, PNorm = 184.1804, GNorm = 0.1023, lr_0 = 1.3225e-04
Loss = 3.0781e-03, PNorm = 184.1823, GNorm = 0.0874, lr_0 = 1.3216e-04
Loss = 4.5016e-03, PNorm = 184.1831, GNorm = 0.0562, lr_0 = 1.3207e-04
Loss = 2.3157e-03, PNorm = 184.1834, GNorm = 0.0829, lr_0 = 1.3198e-04
Loss = 5.8112e-03, PNorm = 184.1847, GNorm = 0.3573, lr_0 = 1.3189e-04
Loss = 2.2743e-03, PNorm = 184.1874, GNorm = 0.1496, lr_0 = 1.3180e-04
Loss = 2.8534e-03, PNorm = 184.1898, GNorm = 0.0827, lr_0 = 1.3171e-04
Loss = 1.4500e-03, PNorm = 184.1910, GNorm = 0.0424, lr_0 = 1.3162e-04
Loss = 7.9688e-03, PNorm = 184.1925, GNorm = 0.0862, lr_0 = 1.3153e-04
Loss = 7.6840e-03, PNorm = 184.1939, GNorm = 0.0624, lr_0 = 1.3144e-04
Loss = 4.8439e-03, PNorm = 184.1966, GNorm = 0.1492, lr_0 = 1.3135e-04
Loss = 1.5029e-03, PNorm = 184.1985, GNorm = 0.0986, lr_0 = 1.3126e-04
Loss = 2.5551e-03, PNorm = 184.2009, GNorm = 0.0846, lr_0 = 1.3117e-04
Loss = 3.1385e-03, PNorm = 184.2032, GNorm = 0.0731, lr_0 = 1.3108e-04
Loss = 2.9065e-03, PNorm = 184.2051, GNorm = 0.0821, lr_0 = 1.3099e-04
Loss = 6.5310e-03, PNorm = 184.2067, GNorm = 0.2673, lr_0 = 1.3090e-04
Loss = 2.1449e-03, PNorm = 184.2085, GNorm = 0.1235, lr_0 = 1.3081e-04
Loss = 2.9241e-03, PNorm = 184.2111, GNorm = 0.0543, lr_0 = 1.3072e-04
Loss = 6.3245e-03, PNorm = 184.2135, GNorm = 0.1817, lr_0 = 1.3063e-04
Loss = 2.3456e-03, PNorm = 184.2143, GNorm = 0.1867, lr_0 = 1.3054e-04
Loss = 4.0585e-03, PNorm = 184.2171, GNorm = 0.1116, lr_0 = 1.3045e-04
Loss = 7.6082e-03, PNorm = 184.2198, GNorm = 0.1353, lr_0 = 1.3036e-04
Loss = 2.9538e-03, PNorm = 184.2225, GNorm = 0.2369, lr_0 = 1.3027e-04
Loss = 3.8657e-03, PNorm = 184.2250, GNorm = 0.0778, lr_0 = 1.3018e-04
Loss = 2.7387e-03, PNorm = 184.2270, GNorm = 0.1408, lr_0 = 1.3009e-04
Loss = 2.4943e-03, PNorm = 184.2291, GNorm = 0.2113, lr_0 = 1.3000e-04
Loss = 3.2754e-03, PNorm = 184.2313, GNorm = 0.1188, lr_0 = 1.2992e-04
Loss = 5.8401e-03, PNorm = 184.2338, GNorm = 0.3193, lr_0 = 1.2983e-04
Loss = 5.1515e-03, PNorm = 184.2370, GNorm = 0.1488, lr_0 = 1.2974e-04
Loss = 2.3784e-03, PNorm = 184.2397, GNorm = 0.0616, lr_0 = 1.2965e-04
Loss = 1.9857e-03, PNorm = 184.2411, GNorm = 0.1013, lr_0 = 1.2956e-04
Loss = 1.5255e-03, PNorm = 184.2424, GNorm = 0.0723, lr_0 = 1.2947e-04
Loss = 2.0034e-03, PNorm = 184.2441, GNorm = 0.0765, lr_0 = 1.2938e-04
Loss = 6.2195e-03, PNorm = 184.2452, GNorm = 0.0856, lr_0 = 1.2929e-04
Loss = 3.9387e-03, PNorm = 184.2469, GNorm = 0.0484, lr_0 = 1.2921e-04
Loss = 3.5133e-03, PNorm = 184.2478, GNorm = 0.0998, lr_0 = 1.2912e-04
Loss = 2.7313e-03, PNorm = 184.2495, GNorm = 0.2250, lr_0 = 1.2903e-04
Loss = 3.2115e-03, PNorm = 184.2530, GNorm = 0.0664, lr_0 = 1.2894e-04
Loss = 1.8529e-03, PNorm = 184.2551, GNorm = 0.4930, lr_0 = 1.2885e-04
Loss = 5.1637e-03, PNorm = 184.2563, GNorm = 0.0845, lr_0 = 1.2876e-04
Loss = 2.1150e-03, PNorm = 184.2584, GNorm = 0.1303, lr_0 = 1.2867e-04
Loss = 2.8176e-03, PNorm = 184.2598, GNorm = 0.0743, lr_0 = 1.2859e-04
Loss = 7.3673e-03, PNorm = 184.2621, GNorm = 0.0677, lr_0 = 1.2850e-04
Loss = 5.0549e-03, PNorm = 184.2650, GNorm = 0.0634, lr_0 = 1.2841e-04
Loss = 5.9149e-03, PNorm = 184.2675, GNorm = 0.1418, lr_0 = 1.2832e-04
Loss = 1.4514e-03, PNorm = 184.2689, GNorm = 0.1091, lr_0 = 1.2823e-04
Loss = 5.3259e-03, PNorm = 184.2705, GNorm = 0.2824, lr_0 = 1.2815e-04
Loss = 4.5449e-03, PNorm = 184.2732, GNorm = 0.3236, lr_0 = 1.2806e-04
Loss = 1.6353e-03, PNorm = 184.2727, GNorm = 0.2591, lr_0 = 1.2797e-04
Validation mae = 0.120560
Epoch 27
Loss = 1.8693e-03, PNorm = 184.2736, GNorm = 0.0648, lr_0 = 1.2788e-04
Loss = 4.4234e-03, PNorm = 184.2735, GNorm = 0.0765, lr_0 = 1.2780e-04
Loss = 1.9904e-03, PNorm = 184.2728, GNorm = 0.0632, lr_0 = 1.2771e-04
Loss = 4.1933e-03, PNorm = 184.2738, GNorm = 0.0981, lr_0 = 1.2762e-04
Loss = 3.3497e-03, PNorm = 184.2746, GNorm = 0.1003, lr_0 = 1.2753e-04
Loss = 4.9881e-03, PNorm = 184.2766, GNorm = 0.1656, lr_0 = 1.2745e-04
Loss = 1.7098e-03, PNorm = 184.2761, GNorm = 0.0833, lr_0 = 1.2736e-04
Loss = 1.7774e-03, PNorm = 184.2773, GNorm = 0.1219, lr_0 = 1.2727e-04
Loss = 2.1902e-03, PNorm = 184.2784, GNorm = 0.1695, lr_0 = 1.2718e-04
Loss = 2.2573e-03, PNorm = 184.2789, GNorm = 0.0644, lr_0 = 1.2710e-04
Loss = 1.7328e-03, PNorm = 184.2822, GNorm = 0.0946, lr_0 = 1.2701e-04
Loss = 1.9105e-03, PNorm = 184.2845, GNorm = 0.0880, lr_0 = 1.2692e-04
Loss = 1.7254e-03, PNorm = 184.2855, GNorm = 0.1343, lr_0 = 1.2684e-04
Loss = 2.8079e-03, PNorm = 184.2866, GNorm = 0.2548, lr_0 = 1.2675e-04
Loss = 2.0373e-03, PNorm = 184.2882, GNorm = 0.0630, lr_0 = 1.2666e-04
Loss = 1.4481e-03, PNorm = 184.2905, GNorm = 0.0690, lr_0 = 1.2658e-04
Loss = 3.9635e-03, PNorm = 184.2914, GNorm = 0.1193, lr_0 = 1.2649e-04
Loss = 1.4310e-03, PNorm = 184.2929, GNorm = 0.1160, lr_0 = 1.2640e-04
Loss = 2.4885e-03, PNorm = 184.2945, GNorm = 0.3441, lr_0 = 1.2632e-04
Loss = 6.8206e-03, PNorm = 184.2964, GNorm = 0.4399, lr_0 = 1.2623e-04
Loss = 1.1234e-02, PNorm = 184.2994, GNorm = 2.6424, lr_0 = 1.2614e-04
Loss = 2.4041e-03, PNorm = 184.3028, GNorm = 0.1408, lr_0 = 1.2606e-04
Loss = 3.1677e-03, PNorm = 184.3041, GNorm = 0.1265, lr_0 = 1.2597e-04
Loss = 1.7321e-03, PNorm = 184.3058, GNorm = 0.1737, lr_0 = 1.2588e-04
Loss = 1.6111e-03, PNorm = 184.3079, GNorm = 0.1600, lr_0 = 1.2580e-04
Loss = 1.8536e-03, PNorm = 184.3087, GNorm = 0.0675, lr_0 = 1.2571e-04
Loss = 2.8063e-03, PNorm = 184.3102, GNorm = 0.0848, lr_0 = 1.2563e-04
Loss = 7.3139e-03, PNorm = 184.3091, GNorm = 0.2030, lr_0 = 1.2554e-04
Loss = 3.7489e-03, PNorm = 184.3103, GNorm = 0.1418, lr_0 = 1.2545e-04
Loss = 3.4463e-03, PNorm = 184.3126, GNorm = 0.0654, lr_0 = 1.2537e-04
Loss = 4.4167e-03, PNorm = 184.3136, GNorm = 0.1130, lr_0 = 1.2528e-04
Loss = 2.1120e-03, PNorm = 184.3158, GNorm = 0.0671, lr_0 = 1.2520e-04
Loss = 3.5042e-03, PNorm = 184.3175, GNorm = 0.0785, lr_0 = 1.2511e-04
Loss = 2.5591e-03, PNorm = 184.3180, GNorm = 0.0675, lr_0 = 1.2502e-04
Loss = 4.1266e-03, PNorm = 184.3197, GNorm = 0.1017, lr_0 = 1.2494e-04
Loss = 5.7267e-03, PNorm = 184.3211, GNorm = 0.1031, lr_0 = 1.2485e-04
Loss = 4.8551e-03, PNorm = 184.3223, GNorm = 0.0983, lr_0 = 1.2477e-04
Loss = 4.1172e-03, PNorm = 184.3239, GNorm = 0.2012, lr_0 = 1.2468e-04
Loss = 5.6882e-03, PNorm = 184.3251, GNorm = 0.1202, lr_0 = 1.2460e-04
Loss = 1.2251e-03, PNorm = 184.3270, GNorm = 0.0918, lr_0 = 1.2451e-04
Loss = 1.4013e-03, PNorm = 184.3304, GNorm = 0.0373, lr_0 = 1.2443e-04
Loss = 2.3578e-03, PNorm = 184.3334, GNorm = 0.0542, lr_0 = 1.2434e-04
Loss = 6.7477e-03, PNorm = 184.3356, GNorm = 0.0775, lr_0 = 1.2426e-04
Loss = 3.7953e-03, PNorm = 184.3371, GNorm = 0.0869, lr_0 = 1.2417e-04
Loss = 1.8811e-03, PNorm = 184.3387, GNorm = 0.0623, lr_0 = 1.2409e-04
Loss = 5.8626e-03, PNorm = 184.3398, GNorm = 0.1274, lr_0 = 1.2400e-04
Loss = 1.5461e-03, PNorm = 184.3411, GNorm = 0.1081, lr_0 = 1.2392e-04
Loss = 2.6970e-03, PNorm = 184.3424, GNorm = 0.0723, lr_0 = 1.2383e-04
Loss = 3.6972e-03, PNorm = 184.3438, GNorm = 0.0670, lr_0 = 1.2375e-04
Loss = 4.2755e-03, PNorm = 184.3452, GNorm = 0.4562, lr_0 = 1.2366e-04
Loss = 2.5207e-03, PNorm = 184.3455, GNorm = 0.1640, lr_0 = 1.2358e-04
Loss = 5.3506e-03, PNorm = 184.3465, GNorm = 0.1017, lr_0 = 1.2349e-04
Loss = 1.6126e-03, PNorm = 184.3477, GNorm = 0.2412, lr_0 = 1.2341e-04
Loss = 3.5101e-03, PNorm = 184.3479, GNorm = 0.0874, lr_0 = 1.2332e-04
Loss = 7.0768e-03, PNorm = 184.3504, GNorm = 0.1715, lr_0 = 1.2324e-04
Loss = 6.3948e-03, PNorm = 184.3530, GNorm = 0.0376, lr_0 = 1.2315e-04
Loss = 1.8739e-03, PNorm = 184.3552, GNorm = 0.0498, lr_0 = 1.2307e-04
Loss = 3.6380e-03, PNorm = 184.3565, GNorm = 0.0866, lr_0 = 1.2298e-04
Loss = 6.3349e-03, PNorm = 184.3564, GNorm = 0.1025, lr_0 = 1.2290e-04
Loss = 3.1330e-03, PNorm = 184.3588, GNorm = 0.1170, lr_0 = 1.2282e-04
Loss = 2.3942e-03, PNorm = 184.3609, GNorm = 0.1112, lr_0 = 1.2273e-04
Loss = 1.2400e-03, PNorm = 184.3615, GNorm = 0.0461, lr_0 = 1.2265e-04
Loss = 2.1030e-03, PNorm = 184.3630, GNorm = 0.1173, lr_0 = 1.2256e-04
Loss = 1.0657e-02, PNorm = 184.3650, GNorm = 0.1421, lr_0 = 1.2248e-04
Loss = 5.8686e-03, PNorm = 184.3653, GNorm = 0.1135, lr_0 = 1.2240e-04
Loss = 1.2952e-03, PNorm = 184.3674, GNorm = 0.0501, lr_0 = 1.2231e-04
Loss = 3.4523e-03, PNorm = 184.3693, GNorm = 0.0921, lr_0 = 1.2223e-04
Loss = 4.6625e-03, PNorm = 184.3713, GNorm = 0.0990, lr_0 = 1.2214e-04
Loss = 1.8315e-03, PNorm = 184.3736, GNorm = 0.0991, lr_0 = 1.2206e-04
Loss = 3.7489e-03, PNorm = 184.3761, GNorm = 0.0890, lr_0 = 1.2198e-04
Loss = 1.9316e-03, PNorm = 184.3779, GNorm = 0.0784, lr_0 = 1.2189e-04
Loss = 3.0281e-03, PNorm = 184.3803, GNorm = 0.1003, lr_0 = 1.2181e-04
Loss = 6.7024e-03, PNorm = 184.3820, GNorm = 0.0815, lr_0 = 1.2173e-04
Loss = 1.8559e-03, PNorm = 184.3840, GNorm = 0.1463, lr_0 = 1.2164e-04
Loss = 1.6758e-03, PNorm = 184.3857, GNorm = 0.1207, lr_0 = 1.2156e-04
Loss = 1.9243e-03, PNorm = 184.3870, GNorm = 0.1190, lr_0 = 1.2148e-04
Loss = 1.2458e-03, PNorm = 184.3898, GNorm = 0.0978, lr_0 = 1.2139e-04
Loss = 1.2222e-03, PNorm = 184.3910, GNorm = 0.0536, lr_0 = 1.2131e-04
Loss = 2.1854e-03, PNorm = 184.3923, GNorm = 0.1240, lr_0 = 1.2123e-04
Loss = 1.5888e-03, PNorm = 184.3947, GNorm = 0.0657, lr_0 = 1.2114e-04
Loss = 2.6502e-03, PNorm = 184.3972, GNorm = 0.0711, lr_0 = 1.2106e-04
Loss = 6.0984e-03, PNorm = 184.3996, GNorm = 0.1052, lr_0 = 1.2098e-04
Loss = 3.7986e-03, PNorm = 184.4012, GNorm = 0.1436, lr_0 = 1.2090e-04
Loss = 2.3498e-03, PNorm = 184.4033, GNorm = 0.0405, lr_0 = 1.2081e-04
Loss = 4.1042e-03, PNorm = 184.4052, GNorm = 0.2201, lr_0 = 1.2073e-04
Loss = 5.8560e-03, PNorm = 184.4076, GNorm = 0.3258, lr_0 = 1.2065e-04
Loss = 1.6129e-03, PNorm = 184.4087, GNorm = 0.0915, lr_0 = 1.2056e-04
Loss = 6.0769e-03, PNorm = 184.4106, GNorm = 0.1999, lr_0 = 1.2048e-04
Loss = 3.1365e-03, PNorm = 184.4103, GNorm = 0.1025, lr_0 = 1.2040e-04
Loss = 3.5506e-03, PNorm = 184.4115, GNorm = 0.0736, lr_0 = 1.2032e-04
Loss = 1.9780e-03, PNorm = 184.4136, GNorm = 0.1304, lr_0 = 1.2023e-04
Loss = 1.4961e-03, PNorm = 184.4159, GNorm = 0.0843, lr_0 = 1.2015e-04
Loss = 1.5945e-03, PNorm = 184.4178, GNorm = 0.0777, lr_0 = 1.2007e-04
Loss = 2.3285e-03, PNorm = 184.4191, GNorm = 0.1535, lr_0 = 1.1999e-04
Loss = 1.8244e-03, PNorm = 184.4204, GNorm = 0.1142, lr_0 = 1.1991e-04
Loss = 3.1725e-03, PNorm = 184.4224, GNorm = 0.1096, lr_0 = 1.1982e-04
Loss = 7.3698e-03, PNorm = 184.4234, GNorm = 0.2907, lr_0 = 1.1974e-04
Loss = 1.5034e-03, PNorm = 184.4246, GNorm = 0.0710, lr_0 = 1.1966e-04
Loss = 2.0647e-03, PNorm = 184.4256, GNorm = 0.0676, lr_0 = 1.1958e-04
Loss = 1.3637e-03, PNorm = 184.4271, GNorm = 0.0524, lr_0 = 1.1950e-04
Loss = 3.2050e-03, PNorm = 184.4290, GNorm = 0.1022, lr_0 = 1.1941e-04
Loss = 1.4049e-03, PNorm = 184.4319, GNorm = 0.0409, lr_0 = 1.1933e-04
Loss = 1.7138e-03, PNorm = 184.4346, GNorm = 0.1746, lr_0 = 1.1925e-04
Loss = 6.5602e-03, PNorm = 184.4358, GNorm = 0.1044, lr_0 = 1.1917e-04
Loss = 1.5219e-03, PNorm = 184.4379, GNorm = 0.1316, lr_0 = 1.1909e-04
Loss = 2.1991e-03, PNorm = 184.4404, GNorm = 0.1720, lr_0 = 1.1901e-04
Loss = 4.9916e-03, PNorm = 184.4415, GNorm = 1.2344, lr_0 = 1.1892e-04
Loss = 4.6387e-03, PNorm = 184.4431, GNorm = 0.2041, lr_0 = 1.1884e-04
Loss = 1.2677e-03, PNorm = 184.4458, GNorm = 0.0734, lr_0 = 1.1876e-04
Loss = 1.6873e-03, PNorm = 184.4472, GNorm = 0.0691, lr_0 = 1.1868e-04
Loss = 3.0616e-03, PNorm = 184.4494, GNorm = 0.0624, lr_0 = 1.1860e-04
Loss = 2.8644e-03, PNorm = 184.4512, GNorm = 0.0836, lr_0 = 1.1852e-04
Loss = 2.2815e-03, PNorm = 184.4527, GNorm = 0.0988, lr_0 = 1.1844e-04
Loss = 2.0178e-03, PNorm = 184.4557, GNorm = 0.0442, lr_0 = 1.1835e-04
Loss = 2.6029e-03, PNorm = 184.4591, GNorm = 0.0621, lr_0 = 1.1827e-04
Loss = 1.4059e-03, PNorm = 184.4621, GNorm = 0.0492, lr_0 = 1.1819e-04
Loss = 4.7161e-03, PNorm = 184.4643, GNorm = 0.0606, lr_0 = 1.1811e-04
Loss = 2.1916e-03, PNorm = 184.4660, GNorm = 0.0465, lr_0 = 1.1803e-04
Loss = 3.7344e-03, PNorm = 184.4666, GNorm = 0.0620, lr_0 = 1.1795e-04
Loss = 5.0230e-03, PNorm = 184.4674, GNorm = 0.5028, lr_0 = 1.1787e-04
Validation mae = 0.120650
Epoch 28
Loss = 3.2954e-03, PNorm = 184.4684, GNorm = 0.0847, lr_0 = 1.1779e-04
Loss = 6.5666e-03, PNorm = 184.4700, GNorm = 0.1182, lr_0 = 1.1771e-04
Loss = 2.4591e-03, PNorm = 184.4711, GNorm = 0.0707, lr_0 = 1.1763e-04
Loss = 3.6656e-03, PNorm = 184.4723, GNorm = 0.1187, lr_0 = 1.1755e-04
Loss = 1.6270e-03, PNorm = 184.4739, GNorm = 0.0603, lr_0 = 1.1747e-04
Loss = 1.2350e-03, PNorm = 184.4753, GNorm = 0.0809, lr_0 = 1.1739e-04
Loss = 1.0833e-03, PNorm = 184.4759, GNorm = 0.0565, lr_0 = 1.1730e-04
Loss = 2.2125e-03, PNorm = 184.4768, GNorm = 0.1095, lr_0 = 1.1722e-04
Loss = 1.4331e-03, PNorm = 184.4777, GNorm = 0.0354, lr_0 = 1.1714e-04
Loss = 1.5768e-03, PNorm = 184.4790, GNorm = 0.1621, lr_0 = 1.1706e-04
Loss = 3.2776e-03, PNorm = 184.4809, GNorm = 0.0861, lr_0 = 1.1698e-04
Loss = 4.0873e-03, PNorm = 184.4811, GNorm = 0.0678, lr_0 = 1.1690e-04
Loss = 4.8301e-03, PNorm = 184.4814, GNorm = 0.0997, lr_0 = 1.1682e-04
Loss = 1.4803e-03, PNorm = 184.4831, GNorm = 0.0562, lr_0 = 1.1674e-04
Loss = 1.7029e-03, PNorm = 184.4844, GNorm = 0.0702, lr_0 = 1.1666e-04
Loss = 3.4592e-03, PNorm = 184.4863, GNorm = 0.0833, lr_0 = 1.1658e-04
Loss = 2.4302e-03, PNorm = 184.4880, GNorm = 0.2024, lr_0 = 1.1650e-04
Loss = 2.9338e-03, PNorm = 184.4885, GNorm = 0.2137, lr_0 = 1.1642e-04
Loss = 1.9749e-03, PNorm = 184.4891, GNorm = 0.0996, lr_0 = 1.1634e-04
Loss = 8.8167e-04, PNorm = 184.4904, GNorm = 0.0643, lr_0 = 1.1626e-04
Loss = 1.1592e-03, PNorm = 184.4913, GNorm = 0.0438, lr_0 = 1.1618e-04
Loss = 2.8203e-03, PNorm = 184.4919, GNorm = 0.0949, lr_0 = 1.1611e-04
Loss = 3.5999e-03, PNorm = 184.4926, GNorm = 0.0473, lr_0 = 1.1603e-04
Loss = 1.6903e-03, PNorm = 184.4935, GNorm = 0.1092, lr_0 = 1.1595e-04
Loss = 5.1782e-03, PNorm = 184.4932, GNorm = 0.0862, lr_0 = 1.1587e-04
Loss = 2.7818e-03, PNorm = 184.4941, GNorm = 0.0395, lr_0 = 1.1579e-04
Loss = 1.5470e-03, PNorm = 184.4964, GNorm = 0.1092, lr_0 = 1.1571e-04
Loss = 4.0611e-03, PNorm = 184.4962, GNorm = 0.2618, lr_0 = 1.1563e-04
Loss = 1.1056e-03, PNorm = 184.4967, GNorm = 0.0966, lr_0 = 1.1555e-04
Loss = 2.9575e-03, PNorm = 184.4986, GNorm = 0.0516, lr_0 = 1.1547e-04
Loss = 2.0980e-03, PNorm = 184.5002, GNorm = 0.0814, lr_0 = 1.1539e-04
Loss = 5.2963e-03, PNorm = 184.5013, GNorm = 0.1555, lr_0 = 1.1531e-04
Loss = 6.2239e-03, PNorm = 184.5022, GNorm = 0.1953, lr_0 = 1.1523e-04
Loss = 1.6005e-03, PNorm = 184.5029, GNorm = 0.1273, lr_0 = 1.1515e-04
Loss = 1.2710e-03, PNorm = 184.5040, GNorm = 0.0654, lr_0 = 1.1508e-04
Loss = 1.1285e-03, PNorm = 184.5055, GNorm = 0.1053, lr_0 = 1.1500e-04
Loss = 1.0463e-03, PNorm = 184.5063, GNorm = 0.1295, lr_0 = 1.1492e-04
Loss = 2.9224e-03, PNorm = 184.5082, GNorm = 0.0766, lr_0 = 1.1484e-04
Loss = 1.9334e-03, PNorm = 184.5100, GNorm = 0.2609, lr_0 = 1.1476e-04
Loss = 1.2841e-03, PNorm = 184.5108, GNorm = 0.1486, lr_0 = 1.1468e-04
Loss = 2.3892e-03, PNorm = 184.5115, GNorm = 0.0778, lr_0 = 1.1460e-04
Loss = 9.9090e-04, PNorm = 184.5135, GNorm = 0.0958, lr_0 = 1.1452e-04
Loss = 3.6491e-03, PNorm = 184.5157, GNorm = 0.1141, lr_0 = 1.1445e-04
Loss = 1.1892e-03, PNorm = 184.5168, GNorm = 0.1328, lr_0 = 1.1437e-04
Loss = 1.2888e-03, PNorm = 184.5183, GNorm = 0.0866, lr_0 = 1.1429e-04
Loss = 8.1577e-03, PNorm = 184.5199, GNorm = 0.1626, lr_0 = 1.1421e-04
Loss = 1.9822e-03, PNorm = 184.5201, GNorm = 0.1438, lr_0 = 1.1413e-04
Loss = 2.1704e-03, PNorm = 184.5208, GNorm = 0.0553, lr_0 = 1.1405e-04
Loss = 1.9718e-03, PNorm = 184.5219, GNorm = 0.0947, lr_0 = 1.1398e-04
Loss = 4.8804e-03, PNorm = 184.5235, GNorm = 0.1363, lr_0 = 1.1390e-04
Loss = 1.2944e-02, PNorm = 184.5270, GNorm = 0.0894, lr_0 = 1.1382e-04
Loss = 4.1098e-03, PNorm = 184.5294, GNorm = 0.2265, lr_0 = 1.1374e-04
Loss = 4.0506e-03, PNorm = 184.5303, GNorm = 0.0580, lr_0 = 1.1366e-04
Loss = 2.4823e-03, PNorm = 184.5319, GNorm = 0.1093, lr_0 = 1.1359e-04
Loss = 3.4544e-03, PNorm = 184.5330, GNorm = 0.1431, lr_0 = 1.1351e-04
Loss = 1.8145e-03, PNorm = 184.5347, GNorm = 0.0675, lr_0 = 1.1343e-04
Loss = 1.3032e-03, PNorm = 184.5351, GNorm = 0.1425, lr_0 = 1.1335e-04
Loss = 2.2251e-03, PNorm = 184.5363, GNorm = 0.0373, lr_0 = 1.1328e-04
Loss = 4.0290e-03, PNorm = 184.5385, GNorm = 0.1175, lr_0 = 1.1320e-04
Loss = 2.9162e-03, PNorm = 184.5380, GNorm = 0.1418, lr_0 = 1.1312e-04
Loss = 3.3590e-03, PNorm = 184.5408, GNorm = 0.0631, lr_0 = 1.1304e-04
Loss = 2.8623e-03, PNorm = 184.5425, GNorm = 0.0893, lr_0 = 1.1297e-04
Loss = 3.9487e-03, PNorm = 184.5449, GNorm = 0.0382, lr_0 = 1.1289e-04
Loss = 4.5444e-03, PNorm = 184.5468, GNorm = 0.1210, lr_0 = 1.1281e-04
Loss = 5.3823e-03, PNorm = 184.5486, GNorm = 0.0880, lr_0 = 1.1273e-04
Loss = 5.3295e-03, PNorm = 184.5510, GNorm = 0.2598, lr_0 = 1.1266e-04
Loss = 2.5699e-03, PNorm = 184.5538, GNorm = 0.1234, lr_0 = 1.1258e-04
Loss = 3.7284e-03, PNorm = 184.5556, GNorm = 0.0946, lr_0 = 1.1250e-04
Loss = 2.6599e-03, PNorm = 184.5563, GNorm = 0.0728, lr_0 = 1.1243e-04
Loss = 6.0565e-03, PNorm = 184.5574, GNorm = 0.1342, lr_0 = 1.1235e-04
Loss = 4.4982e-03, PNorm = 184.5603, GNorm = 0.0681, lr_0 = 1.1227e-04
Loss = 1.4507e-03, PNorm = 184.5618, GNorm = 0.1813, lr_0 = 1.1219e-04
Loss = 3.8286e-03, PNorm = 184.5638, GNorm = 0.1265, lr_0 = 1.1212e-04
Loss = 1.3482e-03, PNorm = 184.5659, GNorm = 0.1261, lr_0 = 1.1204e-04
Loss = 1.9116e-03, PNorm = 184.5673, GNorm = 0.0660, lr_0 = 1.1196e-04
Loss = 2.4415e-03, PNorm = 184.5687, GNorm = 0.1115, lr_0 = 1.1189e-04
Loss = 5.7422e-03, PNorm = 184.5704, GNorm = 0.0683, lr_0 = 1.1181e-04
Loss = 1.0823e-03, PNorm = 184.5709, GNorm = 0.0761, lr_0 = 1.1173e-04
Loss = 2.4743e-03, PNorm = 184.5726, GNorm = 0.2232, lr_0 = 1.1166e-04
Loss = 2.3924e-03, PNorm = 184.5744, GNorm = 0.0632, lr_0 = 1.1158e-04
Loss = 7.5185e-03, PNorm = 184.5761, GNorm = 0.0389, lr_0 = 1.1150e-04
Loss = 3.5743e-03, PNorm = 184.5760, GNorm = 0.2328, lr_0 = 1.1143e-04
Loss = 3.2937e-03, PNorm = 184.5769, GNorm = 0.0896, lr_0 = 1.1135e-04
Loss = 5.7710e-03, PNorm = 184.5776, GNorm = 0.0507, lr_0 = 1.1128e-04
Loss = 1.6508e-03, PNorm = 184.5792, GNorm = 0.1179, lr_0 = 1.1120e-04
Loss = 1.7262e-03, PNorm = 184.5813, GNorm = 0.1531, lr_0 = 1.1112e-04
Loss = 2.3208e-03, PNorm = 184.5834, GNorm = 0.0931, lr_0 = 1.1105e-04
Loss = 8.0777e-03, PNorm = 184.5859, GNorm = 0.0738, lr_0 = 1.1097e-04
Loss = 5.4513e-03, PNorm = 184.5872, GNorm = 0.1456, lr_0 = 1.1089e-04
Loss = 1.8294e-03, PNorm = 184.5874, GNorm = 0.0715, lr_0 = 1.1082e-04
Loss = 3.4145e-03, PNorm = 184.5873, GNorm = 0.0617, lr_0 = 1.1074e-04
Loss = 1.2035e-03, PNorm = 184.5886, GNorm = 0.0457, lr_0 = 1.1067e-04
Loss = 7.6401e-03, PNorm = 184.5903, GNorm = 0.0573, lr_0 = 1.1059e-04
Loss = 1.9905e-03, PNorm = 184.5909, GNorm = 0.0952, lr_0 = 1.1052e-04
Loss = 1.3364e-03, PNorm = 184.5923, GNorm = 0.0761, lr_0 = 1.1044e-04
Loss = 1.7431e-03, PNorm = 184.5942, GNorm = 0.0386, lr_0 = 1.1036e-04
Loss = 4.9617e-03, PNorm = 184.5957, GNorm = 0.0659, lr_0 = 1.1029e-04
Loss = 1.7030e-03, PNorm = 184.5974, GNorm = 0.1654, lr_0 = 1.1021e-04
Loss = 4.8035e-03, PNorm = 184.5988, GNorm = 0.0837, lr_0 = 1.1014e-04
Loss = 2.4359e-03, PNorm = 184.5992, GNorm = 0.1194, lr_0 = 1.1006e-04
Loss = 4.0226e-03, PNorm = 184.5989, GNorm = 0.1073, lr_0 = 1.0999e-04
Loss = 1.9384e-03, PNorm = 184.5995, GNorm = 0.1339, lr_0 = 1.0991e-04
Loss = 5.5057e-03, PNorm = 184.6002, GNorm = 0.0395, lr_0 = 1.0984e-04
Loss = 3.1593e-03, PNorm = 184.6012, GNorm = 0.0527, lr_0 = 1.0976e-04
Loss = 4.3766e-03, PNorm = 184.6025, GNorm = 0.0509, lr_0 = 1.0969e-04
Loss = 2.9617e-03, PNorm = 184.6035, GNorm = 0.1379, lr_0 = 1.0961e-04
Loss = 1.8864e-03, PNorm = 184.6051, GNorm = 0.0615, lr_0 = 1.0954e-04
Loss = 2.7864e-03, PNorm = 184.6064, GNorm = 0.0665, lr_0 = 1.0946e-04
Loss = 1.1447e-03, PNorm = 184.6068, GNorm = 0.0648, lr_0 = 1.0939e-04
Loss = 1.7329e-03, PNorm = 184.6086, GNorm = 0.1524, lr_0 = 1.0931e-04
Loss = 1.4923e-03, PNorm = 184.6106, GNorm = 0.0925, lr_0 = 1.0924e-04
Loss = 9.8553e-04, PNorm = 184.6118, GNorm = 0.0948, lr_0 = 1.0916e-04
Loss = 5.4405e-03, PNorm = 184.6131, GNorm = 0.0978, lr_0 = 1.0909e-04
Loss = 2.0859e-03, PNorm = 184.6143, GNorm = 0.1646, lr_0 = 1.0901e-04
Loss = 2.3686e-03, PNorm = 184.6163, GNorm = 0.1143, lr_0 = 1.0894e-04
Loss = 1.7744e-03, PNorm = 184.6187, GNorm = 0.1805, lr_0 = 1.0886e-04
Loss = 3.7129e-03, PNorm = 184.6206, GNorm = 0.0943, lr_0 = 1.0879e-04
Loss = 2.4365e-03, PNorm = 184.6219, GNorm = 0.0587, lr_0 = 1.0871e-04
Loss = 3.2998e-03, PNorm = 184.6235, GNorm = 0.1530, lr_0 = 1.0864e-04
Loss = 1.0344e-03, PNorm = 184.6239, GNorm = 0.0850, lr_0 = 1.0856e-04
Validation mae = 0.120543
Epoch 29
Loss = 6.6619e-03, PNorm = 184.6260, GNorm = 0.0602, lr_0 = 1.0849e-04
Loss = 2.1135e-03, PNorm = 184.6280, GNorm = 0.1339, lr_0 = 1.0841e-04
Loss = 2.0352e-03, PNorm = 184.6287, GNorm = 0.0757, lr_0 = 1.0834e-04
Loss = 2.9310e-03, PNorm = 184.6297, GNorm = 0.0778, lr_0 = 1.0827e-04
Loss = 5.4883e-03, PNorm = 184.6285, GNorm = 0.1264, lr_0 = 1.0819e-04
Loss = 1.1391e-03, PNorm = 184.6285, GNorm = 0.1143, lr_0 = 1.0812e-04
Loss = 4.8214e-03, PNorm = 184.6300, GNorm = 0.0654, lr_0 = 1.0804e-04
Loss = 1.2189e-03, PNorm = 184.6304, GNorm = 0.1740, lr_0 = 1.0797e-04
Loss = 2.9806e-03, PNorm = 184.6309, GNorm = 0.1094, lr_0 = 1.0790e-04
Loss = 1.8940e-03, PNorm = 184.6322, GNorm = 0.0947, lr_0 = 1.0782e-04
Loss = 2.5627e-03, PNorm = 184.6339, GNorm = 0.2883, lr_0 = 1.0775e-04
Loss = 1.4180e-03, PNorm = 184.6347, GNorm = 0.2737, lr_0 = 1.0767e-04
Loss = 2.2388e-03, PNorm = 184.6361, GNorm = 0.0790, lr_0 = 1.0760e-04
Loss = 3.7785e-03, PNorm = 184.6375, GNorm = 0.0482, lr_0 = 1.0753e-04
Loss = 2.8585e-03, PNorm = 184.6384, GNorm = 0.2614, lr_0 = 1.0745e-04
Loss = 1.1891e-03, PNorm = 184.6399, GNorm = 0.1390, lr_0 = 1.0738e-04
Loss = 2.0828e-03, PNorm = 184.6414, GNorm = 0.1359, lr_0 = 1.0731e-04
Loss = 1.5970e-03, PNorm = 184.6438, GNorm = 0.1094, lr_0 = 1.0723e-04
Loss = 8.4496e-03, PNorm = 184.6452, GNorm = 0.2363, lr_0 = 1.0716e-04
Loss = 5.2062e-03, PNorm = 184.6460, GNorm = 0.0592, lr_0 = 1.0709e-04
Loss = 3.4339e-03, PNorm = 184.6471, GNorm = 0.4848, lr_0 = 1.0701e-04
Loss = 2.1695e-03, PNorm = 184.6495, GNorm = 0.0730, lr_0 = 1.0694e-04
Loss = 1.1622e-03, PNorm = 184.6506, GNorm = 0.1954, lr_0 = 1.0687e-04
Loss = 1.4458e-03, PNorm = 184.6506, GNorm = 0.0440, lr_0 = 1.0679e-04
Loss = 2.8783e-03, PNorm = 184.6503, GNorm = 0.0535, lr_0 = 1.0672e-04
Loss = 4.7346e-03, PNorm = 184.6510, GNorm = 0.2002, lr_0 = 1.0665e-04
Loss = 1.6796e-03, PNorm = 184.6517, GNorm = 0.6749, lr_0 = 1.0657e-04
Loss = 1.0303e-03, PNorm = 184.6527, GNorm = 0.1530, lr_0 = 1.0650e-04
Loss = 1.9864e-03, PNorm = 184.6539, GNorm = 0.0500, lr_0 = 1.0643e-04
Loss = 6.8964e-03, PNorm = 184.6558, GNorm = 0.2577, lr_0 = 1.0635e-04
Loss = 1.2037e-03, PNorm = 184.6573, GNorm = 0.1056, lr_0 = 1.0628e-04
Loss = 1.5686e-03, PNorm = 184.6577, GNorm = 0.1083, lr_0 = 1.0621e-04
Loss = 3.5397e-03, PNorm = 184.6582, GNorm = 0.1069, lr_0 = 1.0614e-04
Loss = 3.1490e-03, PNorm = 184.6596, GNorm = 0.2044, lr_0 = 1.0606e-04
Loss = 1.1185e-02, PNorm = 184.6624, GNorm = 0.1929, lr_0 = 1.0599e-04
Loss = 4.2853e-03, PNorm = 184.6628, GNorm = 0.0579, lr_0 = 1.0592e-04
Loss = 9.4648e-03, PNorm = 184.6619, GNorm = 0.2070, lr_0 = 1.0585e-04
Loss = 4.3885e-03, PNorm = 184.6630, GNorm = 0.1064, lr_0 = 1.0577e-04
Loss = 1.2163e-03, PNorm = 184.6648, GNorm = 0.1109, lr_0 = 1.0570e-04
Loss = 1.9993e-03, PNorm = 184.6655, GNorm = 0.1229, lr_0 = 1.0563e-04
Loss = 1.1620e-03, PNorm = 184.6658, GNorm = 0.0562, lr_0 = 1.0556e-04
Loss = 1.6910e-03, PNorm = 184.6660, GNorm = 0.0628, lr_0 = 1.0548e-04
Loss = 1.1746e-03, PNorm = 184.6671, GNorm = 0.0621, lr_0 = 1.0541e-04
Loss = 1.0145e-03, PNorm = 184.6678, GNorm = 0.0878, lr_0 = 1.0534e-04
Loss = 1.4099e-03, PNorm = 184.6688, GNorm = 0.0392, lr_0 = 1.0527e-04
Loss = 2.4357e-03, PNorm = 184.6707, GNorm = 0.0641, lr_0 = 1.0519e-04
Loss = 2.5440e-03, PNorm = 184.6721, GNorm = 0.1005, lr_0 = 1.0512e-04
Loss = 2.3430e-03, PNorm = 184.6731, GNorm = 0.4411, lr_0 = 1.0505e-04
Loss = 6.8991e-03, PNorm = 184.6737, GNorm = 0.0702, lr_0 = 1.0498e-04
Loss = 4.0281e-03, PNorm = 184.6753, GNorm = 0.0488, lr_0 = 1.0491e-04
Loss = 1.3305e-03, PNorm = 184.6766, GNorm = 0.1480, lr_0 = 1.0483e-04
Loss = 2.1403e-03, PNorm = 184.6780, GNorm = 0.0460, lr_0 = 1.0476e-04
Loss = 2.6087e-03, PNorm = 184.6800, GNorm = 0.2424, lr_0 = 1.0469e-04
Loss = 1.6455e-03, PNorm = 184.6816, GNorm = 0.0796, lr_0 = 1.0462e-04
Loss = 1.4223e-03, PNorm = 184.6819, GNorm = 0.0707, lr_0 = 1.0455e-04
Loss = 2.9401e-03, PNorm = 184.6824, GNorm = 0.2916, lr_0 = 1.0448e-04
Loss = 2.8872e-03, PNorm = 184.6824, GNorm = 0.0606, lr_0 = 1.0440e-04
Loss = 1.7505e-03, PNorm = 184.6823, GNorm = 0.0964, lr_0 = 1.0433e-04
Loss = 1.0173e-02, PNorm = 184.6827, GNorm = 0.2055, lr_0 = 1.0426e-04
Loss = 6.3604e-03, PNorm = 184.6844, GNorm = 0.0670, lr_0 = 1.0419e-04
Loss = 5.7333e-03, PNorm = 184.6839, GNorm = 0.6678, lr_0 = 1.0412e-04
Loss = 3.8259e-03, PNorm = 184.6868, GNorm = 0.1500, lr_0 = 1.0405e-04
Loss = 1.9549e-03, PNorm = 184.6887, GNorm = 0.0789, lr_0 = 1.0398e-04
Loss = 2.3011e-03, PNorm = 184.6905, GNorm = 0.1016, lr_0 = 1.0391e-04
Loss = 5.7070e-03, PNorm = 184.6942, GNorm = 0.1044, lr_0 = 1.0383e-04
Loss = 1.5018e-03, PNorm = 184.6971, GNorm = 0.1658, lr_0 = 1.0376e-04
Loss = 1.3998e-03, PNorm = 184.6986, GNorm = 0.1011, lr_0 = 1.0369e-04
Loss = 2.0650e-03, PNorm = 184.7001, GNorm = 0.1413, lr_0 = 1.0362e-04
Loss = 1.2471e-03, PNorm = 184.7013, GNorm = 0.0871, lr_0 = 1.0355e-04
Loss = 9.3764e-04, PNorm = 184.7032, GNorm = 0.0418, lr_0 = 1.0348e-04
Loss = 4.3069e-03, PNorm = 184.7048, GNorm = 0.0479, lr_0 = 1.0341e-04
Loss = 3.0572e-03, PNorm = 184.7060, GNorm = 0.4929, lr_0 = 1.0334e-04
Loss = 2.6913e-03, PNorm = 184.7071, GNorm = 0.0497, lr_0 = 1.0327e-04
Loss = 3.3398e-03, PNorm = 184.7085, GNorm = 0.0816, lr_0 = 1.0320e-04
Loss = 1.7649e-03, PNorm = 184.7103, GNorm = 0.0491, lr_0 = 1.0312e-04
Loss = 1.7258e-03, PNorm = 184.7117, GNorm = 0.0521, lr_0 = 1.0305e-04
Loss = 7.3675e-04, PNorm = 184.7129, GNorm = 0.0659, lr_0 = 1.0298e-04
Loss = 1.7345e-03, PNorm = 184.7136, GNorm = 0.0914, lr_0 = 1.0291e-04
Loss = 1.8983e-03, PNorm = 184.7140, GNorm = 0.0921, lr_0 = 1.0284e-04
Loss = 2.1752e-03, PNorm = 184.7146, GNorm = 0.1122, lr_0 = 1.0277e-04
Loss = 1.9151e-03, PNorm = 184.7155, GNorm = 0.1391, lr_0 = 1.0270e-04
Loss = 1.4733e-03, PNorm = 184.7171, GNorm = 0.0533, lr_0 = 1.0263e-04
Loss = 2.4323e-03, PNorm = 184.7190, GNorm = 0.1422, lr_0 = 1.0256e-04
Loss = 2.6708e-03, PNorm = 184.7195, GNorm = 0.0405, lr_0 = 1.0249e-04
Loss = 2.9471e-03, PNorm = 184.7197, GNorm = 0.0632, lr_0 = 1.0242e-04
Loss = 1.0723e-03, PNorm = 184.7201, GNorm = 0.0330, lr_0 = 1.0235e-04
Loss = 3.6061e-03, PNorm = 184.7205, GNorm = 0.6402, lr_0 = 1.0228e-04
Loss = 1.1540e-03, PNorm = 184.7211, GNorm = 0.0651, lr_0 = 1.0221e-04
Loss = 3.7411e-03, PNorm = 184.7225, GNorm = 0.0545, lr_0 = 1.0214e-04
Loss = 8.4187e-04, PNorm = 184.7239, GNorm = 0.1053, lr_0 = 1.0207e-04
Loss = 1.7064e-03, PNorm = 184.7255, GNorm = 0.1100, lr_0 = 1.0200e-04
Loss = 1.9855e-03, PNorm = 184.7267, GNorm = 0.0574, lr_0 = 1.0193e-04
Loss = 1.0360e-03, PNorm = 184.7277, GNorm = 0.0935, lr_0 = 1.0186e-04
Loss = 3.4381e-03, PNorm = 184.7285, GNorm = 0.0499, lr_0 = 1.0179e-04
Loss = 1.6315e-03, PNorm = 184.7295, GNorm = 0.0544, lr_0 = 1.0172e-04
Loss = 2.5356e-03, PNorm = 184.7299, GNorm = 0.1972, lr_0 = 1.0165e-04
Loss = 1.6751e-03, PNorm = 184.7315, GNorm = 0.1508, lr_0 = 1.0158e-04
Loss = 3.5550e-03, PNorm = 184.7324, GNorm = 0.0533, lr_0 = 1.0151e-04
Loss = 3.5820e-03, PNorm = 184.7340, GNorm = 0.1145, lr_0 = 1.0144e-04
Loss = 5.8380e-03, PNorm = 184.7345, GNorm = 0.1371, lr_0 = 1.0137e-04
Loss = 1.7162e-03, PNorm = 184.7355, GNorm = 0.0757, lr_0 = 1.0130e-04
Loss = 4.5241e-03, PNorm = 184.7378, GNorm = 0.4617, lr_0 = 1.0123e-04
Loss = 2.5097e-03, PNorm = 184.7382, GNorm = 0.0676, lr_0 = 1.0116e-04
Loss = 2.1498e-03, PNorm = 184.7386, GNorm = 0.1359, lr_0 = 1.0110e-04
Loss = 2.9350e-03, PNorm = 184.7403, GNorm = 0.0384, lr_0 = 1.0103e-04
Loss = 1.4944e-03, PNorm = 184.7421, GNorm = 0.0721, lr_0 = 1.0096e-04
Loss = 3.0868e-03, PNorm = 184.7432, GNorm = 0.3031, lr_0 = 1.0089e-04
Loss = 1.2323e-03, PNorm = 184.7447, GNorm = 0.1170, lr_0 = 1.0082e-04
Loss = 1.9418e-03, PNorm = 184.7455, GNorm = 0.1350, lr_0 = 1.0075e-04
Loss = 2.6688e-03, PNorm = 184.7474, GNorm = 0.0913, lr_0 = 1.0068e-04
Loss = 2.7940e-03, PNorm = 184.7491, GNorm = 0.0851, lr_0 = 1.0061e-04
Loss = 1.6366e-03, PNorm = 184.7499, GNorm = 0.0568, lr_0 = 1.0054e-04
Loss = 2.0353e-03, PNorm = 184.7514, GNorm = 0.0366, lr_0 = 1.0047e-04
Loss = 1.3410e-03, PNorm = 184.7524, GNorm = 0.0850, lr_0 = 1.0041e-04
Loss = 8.4094e-03, PNorm = 184.7526, GNorm = 0.2310, lr_0 = 1.0034e-04
Loss = 5.8895e-03, PNorm = 184.7536, GNorm = 0.1254, lr_0 = 1.0027e-04
Loss = 3.9354e-03, PNorm = 184.7551, GNorm = 0.0835, lr_0 = 1.0020e-04
Loss = 3.7199e-03, PNorm = 184.7564, GNorm = 0.1351, lr_0 = 1.0013e-04
Loss = 3.4500e-03, PNorm = 184.7569, GNorm = 0.2121, lr_0 = 1.0006e-04
Loss = 2.5027e-03, PNorm = 184.7562, GNorm = 0.1069, lr_0 = 1.0000e-04
Validation mae = 0.120569
Model 0 best validation mae = 0.120490 on epoch 24
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119689
Ensemble test mae = 0.119689
Fold 6
Splitting data with seed 6
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.2988e-01, PNorm = 64.6285, GNorm = 1.8417, lr_0 = 1.0413e-04
Loss = 9.2131e-01, PNorm = 64.6390, GNorm = 3.6301, lr_0 = 1.0788e-04
Loss = 9.3551e-01, PNorm = 64.6483, GNorm = 2.6055, lr_0 = 1.1163e-04
Loss = 6.6846e-01, PNorm = 64.6577, GNorm = 3.7288, lr_0 = 1.1537e-04
Loss = 7.2597e-01, PNorm = 64.6658, GNorm = 2.0700, lr_0 = 1.1913e-04
Loss = 6.8824e-01, PNorm = 64.6741, GNorm = 2.8407, lr_0 = 1.2287e-04
Loss = 6.9997e-01, PNorm = 64.6837, GNorm = 2.3657, lr_0 = 1.2663e-04
Loss = 6.8231e-01, PNorm = 64.6929, GNorm = 3.0198, lr_0 = 1.3038e-04
Loss = 7.0999e-01, PNorm = 64.7031, GNorm = 1.8031, lr_0 = 1.3413e-04
Loss = 5.7968e-01, PNorm = 64.7134, GNorm = 3.6761, lr_0 = 1.3788e-04
Loss = 6.1934e-01, PNorm = 64.7245, GNorm = 1.7341, lr_0 = 1.4163e-04
Loss = 6.5142e-01, PNorm = 64.7360, GNorm = 2.2619, lr_0 = 1.4537e-04
Loss = 6.0270e-01, PNorm = 64.7453, GNorm = 2.1713, lr_0 = 1.4913e-04
Loss = 6.5475e-01, PNorm = 64.7554, GNorm = 3.1135, lr_0 = 1.5288e-04
Loss = 6.5393e-01, PNorm = 64.7679, GNorm = 2.3722, lr_0 = 1.5662e-04
Loss = 6.3343e-01, PNorm = 64.7808, GNorm = 2.0437, lr_0 = 1.6038e-04
Loss = 5.9660e-01, PNorm = 64.7928, GNorm = 2.0231, lr_0 = 1.6412e-04
Loss = 5.4307e-01, PNorm = 64.8049, GNorm = 2.8288, lr_0 = 1.6788e-04
Loss = 6.0586e-01, PNorm = 64.8165, GNorm = 2.0449, lr_0 = 1.7163e-04
Loss = 6.7458e-01, PNorm = 64.8327, GNorm = 3.1416, lr_0 = 1.7538e-04
Loss = 5.7888e-01, PNorm = 64.8498, GNorm = 2.4766, lr_0 = 1.7913e-04
Loss = 6.2535e-01, PNorm = 64.8647, GNorm = 3.1771, lr_0 = 1.8288e-04
Loss = 5.2900e-01, PNorm = 64.8769, GNorm = 1.8502, lr_0 = 1.8662e-04
Loss = 6.0597e-01, PNorm = 64.8922, GNorm = 2.0508, lr_0 = 1.9038e-04
Loss = 5.9092e-01, PNorm = 64.9077, GNorm = 2.4963, lr_0 = 1.9413e-04
Loss = 5.4704e-01, PNorm = 64.9236, GNorm = 1.5259, lr_0 = 1.9788e-04
Loss = 6.5896e-01, PNorm = 64.9394, GNorm = 1.8718, lr_0 = 2.0163e-04
Loss = 6.4626e-01, PNorm = 64.9578, GNorm = 2.3235, lr_0 = 2.0537e-04
Loss = 6.2607e-01, PNorm = 64.9755, GNorm = 1.9931, lr_0 = 2.0913e-04
Loss = 5.1012e-01, PNorm = 64.9943, GNorm = 2.3328, lr_0 = 2.1288e-04
Loss = 5.5807e-01, PNorm = 65.0139, GNorm = 2.8508, lr_0 = 2.1663e-04
Loss = 6.1095e-01, PNorm = 65.0333, GNorm = 2.2938, lr_0 = 2.2038e-04
Loss = 5.3797e-01, PNorm = 65.0548, GNorm = 1.6914, lr_0 = 2.2412e-04
Loss = 5.7673e-01, PNorm = 65.0758, GNorm = 2.1403, lr_0 = 2.2787e-04
Loss = 5.0801e-01, PNorm = 65.0974, GNorm = 1.9395, lr_0 = 2.3163e-04
Loss = 5.2731e-01, PNorm = 65.1170, GNorm = 2.7154, lr_0 = 2.3538e-04
Loss = 6.2457e-01, PNorm = 65.1372, GNorm = 2.1408, lr_0 = 2.3913e-04
Loss = 5.7154e-01, PNorm = 65.1593, GNorm = 1.7624, lr_0 = 2.4288e-04
Loss = 5.3422e-01, PNorm = 65.1785, GNorm = 1.9782, lr_0 = 2.4662e-04
Loss = 5.7901e-01, PNorm = 65.2016, GNorm = 1.7471, lr_0 = 2.5038e-04
Loss = 5.9995e-01, PNorm = 65.2253, GNorm = 2.0591, lr_0 = 2.5413e-04
Loss = 4.7788e-01, PNorm = 65.2479, GNorm = 1.4994, lr_0 = 2.5788e-04
Loss = 5.3207e-01, PNorm = 65.2694, GNorm = 2.7298, lr_0 = 2.6163e-04
Loss = 5.2319e-01, PNorm = 65.2951, GNorm = 2.0135, lr_0 = 2.6537e-04
Loss = 5.5811e-01, PNorm = 65.3203, GNorm = 1.8771, lr_0 = 2.6912e-04
Loss = 4.2557e-01, PNorm = 65.3460, GNorm = 1.9383, lr_0 = 2.7288e-04
Loss = 4.9386e-01, PNorm = 65.3662, GNorm = 1.2963, lr_0 = 2.7663e-04
Loss = 5.4946e-01, PNorm = 65.3926, GNorm = 1.8018, lr_0 = 2.8038e-04
Loss = 5.2529e-01, PNorm = 65.4186, GNorm = 1.6840, lr_0 = 2.8413e-04
Loss = 5.2493e-01, PNorm = 65.4406, GNorm = 1.5849, lr_0 = 2.8787e-04
Loss = 6.0513e-01, PNorm = 65.4687, GNorm = 1.6700, lr_0 = 2.9163e-04
Loss = 6.4850e-01, PNorm = 65.4971, GNorm = 1.7332, lr_0 = 2.9538e-04
Loss = 5.2822e-01, PNorm = 65.5296, GNorm = 1.8480, lr_0 = 2.9913e-04
Loss = 5.2876e-01, PNorm = 65.5584, GNorm = 1.4858, lr_0 = 3.0288e-04
Loss = 5.2760e-01, PNorm = 65.5894, GNorm = 2.0515, lr_0 = 3.0662e-04
Loss = 5.9289e-01, PNorm = 65.6216, GNorm = 1.7500, lr_0 = 3.1037e-04
Loss = 5.5236e-01, PNorm = 65.6488, GNorm = 1.3554, lr_0 = 3.1413e-04
Loss = 5.4101e-01, PNorm = 65.6800, GNorm = 1.5028, lr_0 = 3.1788e-04
Loss = 5.5983e-01, PNorm = 65.7136, GNorm = 1.6009, lr_0 = 3.2163e-04
Loss = 6.6322e-01, PNorm = 65.7474, GNorm = 1.4141, lr_0 = 3.2538e-04
Loss = 5.1784e-01, PNorm = 65.7840, GNorm = 1.8476, lr_0 = 3.2912e-04
Loss = 5.3501e-01, PNorm = 65.8162, GNorm = 1.3808, lr_0 = 3.3288e-04
Loss = 5.8959e-01, PNorm = 65.8531, GNorm = 1.3138, lr_0 = 3.3663e-04
Loss = 4.9458e-01, PNorm = 65.8837, GNorm = 1.4718, lr_0 = 3.4038e-04
Loss = 4.5323e-01, PNorm = 65.9182, GNorm = 1.3578, lr_0 = 3.4413e-04
Loss = 6.0853e-01, PNorm = 65.9508, GNorm = 1.5628, lr_0 = 3.4787e-04
Loss = 4.8579e-01, PNorm = 65.9828, GNorm = 1.2024, lr_0 = 3.5162e-04
Loss = 5.1702e-01, PNorm = 66.0208, GNorm = 1.6689, lr_0 = 3.5538e-04
Loss = 5.0371e-01, PNorm = 66.0570, GNorm = 1.4227, lr_0 = 3.5913e-04
Loss = 5.8128e-01, PNorm = 66.0947, GNorm = 1.3640, lr_0 = 3.6288e-04
Loss = 5.5714e-01, PNorm = 66.1388, GNorm = 1.4906, lr_0 = 3.6662e-04
Loss = 5.3320e-01, PNorm = 66.1831, GNorm = 1.1307, lr_0 = 3.7037e-04
Loss = 4.7509e-01, PNorm = 66.2234, GNorm = 1.7760, lr_0 = 3.7413e-04
Loss = 4.8371e-01, PNorm = 66.2582, GNorm = 1.5609, lr_0 = 3.7788e-04
Loss = 5.0358e-01, PNorm = 66.2981, GNorm = 1.1400, lr_0 = 3.8163e-04
Loss = 4.8103e-01, PNorm = 66.3407, GNorm = 1.4310, lr_0 = 3.8537e-04
Loss = 5.0414e-01, PNorm = 66.3748, GNorm = 1.3863, lr_0 = 3.8912e-04
Loss = 5.0782e-01, PNorm = 66.4149, GNorm = 1.7681, lr_0 = 3.9287e-04
Loss = 4.5368e-01, PNorm = 66.4556, GNorm = 1.4316, lr_0 = 3.9663e-04
Loss = 5.7766e-01, PNorm = 66.4913, GNorm = 2.1500, lr_0 = 4.0038e-04
Loss = 5.1367e-01, PNorm = 66.5331, GNorm = 1.5318, lr_0 = 4.0413e-04
Loss = 4.2477e-01, PNorm = 66.5793, GNorm = 1.4578, lr_0 = 4.0787e-04
Loss = 4.7208e-01, PNorm = 66.6220, GNorm = 1.1374, lr_0 = 4.1162e-04
Loss = 5.6107e-01, PNorm = 66.6656, GNorm = 1.4610, lr_0 = 4.1537e-04
Loss = 5.4057e-01, PNorm = 66.7077, GNorm = 1.4125, lr_0 = 4.1913e-04
Loss = 5.3804e-01, PNorm = 66.7562, GNorm = 1.2575, lr_0 = 4.2288e-04
Loss = 4.6212e-01, PNorm = 66.8048, GNorm = 1.6196, lr_0 = 4.2662e-04
Loss = 5.4301e-01, PNorm = 66.8515, GNorm = 1.9298, lr_0 = 4.3037e-04
Loss = 4.9370e-01, PNorm = 66.9018, GNorm = 1.3932, lr_0 = 4.3412e-04
Loss = 5.0744e-01, PNorm = 66.9521, GNorm = 2.1136, lr_0 = 4.3788e-04
Loss = 5.9623e-01, PNorm = 66.9979, GNorm = 1.8200, lr_0 = 4.4163e-04
Loss = 5.3196e-01, PNorm = 67.0522, GNorm = 1.4911, lr_0 = 4.4538e-04
Loss = 5.5111e-01, PNorm = 67.1066, GNorm = 1.1905, lr_0 = 4.4912e-04
Loss = 4.3687e-01, PNorm = 67.1573, GNorm = 1.1638, lr_0 = 4.5287e-04
Loss = 5.0824e-01, PNorm = 67.2085, GNorm = 1.2822, lr_0 = 4.5662e-04
Loss = 5.3321e-01, PNorm = 67.2636, GNorm = 1.1813, lr_0 = 4.6038e-04
Loss = 4.9044e-01, PNorm = 67.3161, GNorm = 1.3032, lr_0 = 4.6413e-04
Loss = 4.9825e-01, PNorm = 67.3674, GNorm = 1.2606, lr_0 = 4.6787e-04
Loss = 4.9285e-01, PNorm = 67.4243, GNorm = 1.3529, lr_0 = 4.7162e-04
Loss = 4.8187e-01, PNorm = 67.4752, GNorm = 1.1651, lr_0 = 4.7537e-04
Loss = 4.6973e-01, PNorm = 67.5371, GNorm = 1.5775, lr_0 = 4.7913e-04
Loss = 5.6619e-01, PNorm = 67.5946, GNorm = 1.4673, lr_0 = 4.8288e-04
Loss = 5.2242e-01, PNorm = 67.6613, GNorm = 1.3904, lr_0 = 4.8663e-04
Loss = 5.5785e-01, PNorm = 67.7213, GNorm = 1.3858, lr_0 = 4.9038e-04
Loss = 5.1022e-01, PNorm = 67.7826, GNorm = 1.0393, lr_0 = 4.9412e-04
Loss = 4.8363e-01, PNorm = 67.8418, GNorm = 1.4265, lr_0 = 4.9788e-04
Loss = 5.3168e-01, PNorm = 67.8982, GNorm = 1.9295, lr_0 = 5.0163e-04
Loss = 5.3564e-01, PNorm = 67.9602, GNorm = 2.0927, lr_0 = 5.0538e-04
Loss = 5.2531e-01, PNorm = 68.0244, GNorm = 1.2670, lr_0 = 5.0913e-04
Loss = 5.2467e-01, PNorm = 68.0874, GNorm = 1.9484, lr_0 = 5.1287e-04
Loss = 5.3348e-01, PNorm = 68.1474, GNorm = 1.1403, lr_0 = 5.1663e-04
Loss = 4.9477e-01, PNorm = 68.2088, GNorm = 1.3096, lr_0 = 5.2038e-04
Loss = 4.9448e-01, PNorm = 68.2621, GNorm = 1.0159, lr_0 = 5.2413e-04
Loss = 4.8161e-01, PNorm = 68.3229, GNorm = 1.2366, lr_0 = 5.2788e-04
Loss = 5.0820e-01, PNorm = 68.3887, GNorm = 1.5022, lr_0 = 5.3162e-04
Loss = 5.4191e-01, PNorm = 68.4480, GNorm = 1.4718, lr_0 = 5.3538e-04
Loss = 4.8195e-01, PNorm = 68.5127, GNorm = 1.1563, lr_0 = 5.3912e-04
Loss = 4.8463e-01, PNorm = 68.5812, GNorm = 2.0007, lr_0 = 5.4288e-04
Loss = 4.5067e-01, PNorm = 68.6422, GNorm = 1.6523, lr_0 = 5.4663e-04
Loss = 4.8130e-01, PNorm = 68.7064, GNorm = 1.0691, lr_0 = 5.5038e-04
Validation mae = 0.129589
Epoch 1
Loss = 3.9440e-01, PNorm = 68.7711, GNorm = 1.1476, lr_0 = 5.5413e-04
Loss = 4.4439e-01, PNorm = 68.8430, GNorm = 1.5830, lr_0 = 5.5787e-04
Loss = 3.5806e-01, PNorm = 68.9189, GNorm = 1.5055, lr_0 = 5.6163e-04
Loss = 4.0447e-01, PNorm = 68.9993, GNorm = 1.6115, lr_0 = 5.6538e-04
Loss = 3.9343e-01, PNorm = 69.0880, GNorm = 1.4524, lr_0 = 5.6913e-04
Loss = 3.7107e-01, PNorm = 69.1780, GNorm = 0.9325, lr_0 = 5.7288e-04
Loss = 3.5481e-01, PNorm = 69.2763, GNorm = 1.3046, lr_0 = 5.7662e-04
Loss = 4.0079e-01, PNorm = 69.3680, GNorm = 1.1174, lr_0 = 5.8038e-04
Loss = 3.9522e-01, PNorm = 69.4627, GNorm = 1.0521, lr_0 = 5.8413e-04
Loss = 4.3080e-01, PNorm = 69.5616, GNorm = 1.1056, lr_0 = 5.8788e-04
Loss = 4.1599e-01, PNorm = 69.6582, GNorm = 1.2502, lr_0 = 5.9163e-04
Loss = 3.9157e-01, PNorm = 69.7457, GNorm = 1.1331, lr_0 = 5.9538e-04
Loss = 3.7869e-01, PNorm = 69.8378, GNorm = 1.6002, lr_0 = 5.9913e-04
Loss = 3.7584e-01, PNorm = 69.9249, GNorm = 1.4167, lr_0 = 6.0288e-04
Loss = 3.7616e-01, PNorm = 70.0146, GNorm = 1.1894, lr_0 = 6.0663e-04
Loss = 4.0390e-01, PNorm = 70.1108, GNorm = 1.9581, lr_0 = 6.1038e-04
Loss = 3.7573e-01, PNorm = 70.2029, GNorm = 0.9183, lr_0 = 6.1413e-04
Loss = 4.1194e-01, PNorm = 70.3169, GNorm = 1.7180, lr_0 = 6.1788e-04
Loss = 3.7410e-01, PNorm = 70.4215, GNorm = 1.2699, lr_0 = 6.2163e-04
Loss = 4.2598e-01, PNorm = 70.5408, GNorm = 1.9884, lr_0 = 6.2538e-04
Loss = 4.2269e-01, PNorm = 70.6551, GNorm = 1.4698, lr_0 = 6.2913e-04
Loss = 4.3910e-01, PNorm = 70.7951, GNorm = 1.3156, lr_0 = 6.3288e-04
Loss = 3.9665e-01, PNorm = 70.9130, GNorm = 0.9226, lr_0 = 6.3663e-04
Loss = 3.7359e-01, PNorm = 71.0378, GNorm = 1.4384, lr_0 = 6.4038e-04
Loss = 4.4937e-01, PNorm = 71.1561, GNorm = 1.6636, lr_0 = 6.4413e-04
Loss = 4.6215e-01, PNorm = 71.2766, GNorm = 1.0799, lr_0 = 6.4788e-04
Loss = 4.2593e-01, PNorm = 71.4047, GNorm = 0.7764, lr_0 = 6.5163e-04
Loss = 3.9690e-01, PNorm = 71.5198, GNorm = 1.6866, lr_0 = 6.5538e-04
Loss = 3.9707e-01, PNorm = 71.6280, GNorm = 0.9299, lr_0 = 6.5913e-04
Loss = 4.1296e-01, PNorm = 71.7491, GNorm = 1.2206, lr_0 = 6.6288e-04
Loss = 3.7507e-01, PNorm = 71.8650, GNorm = 1.0590, lr_0 = 6.6663e-04
Loss = 3.9227e-01, PNorm = 71.9788, GNorm = 1.1808, lr_0 = 6.7038e-04
Loss = 3.9877e-01, PNorm = 72.0949, GNorm = 1.4168, lr_0 = 6.7413e-04
Loss = 4.3543e-01, PNorm = 72.2096, GNorm = 1.2212, lr_0 = 6.7788e-04
Loss = 4.1587e-01, PNorm = 72.3313, GNorm = 1.2752, lr_0 = 6.8163e-04
Loss = 4.1521e-01, PNorm = 72.4540, GNorm = 1.5213, lr_0 = 6.8538e-04
Loss = 5.2767e-01, PNorm = 72.5824, GNorm = 1.1516, lr_0 = 6.8913e-04
Loss = 4.6292e-01, PNorm = 72.7278, GNorm = 1.8414, lr_0 = 6.9288e-04
Loss = 4.4022e-01, PNorm = 72.8610, GNorm = 1.3148, lr_0 = 6.9663e-04
Loss = 4.3491e-01, PNorm = 72.9993, GNorm = 1.3344, lr_0 = 7.0038e-04
Loss = 3.9395e-01, PNorm = 73.1189, GNorm = 1.4053, lr_0 = 7.0413e-04
Loss = 3.9218e-01, PNorm = 73.2463, GNorm = 1.0184, lr_0 = 7.0788e-04
Loss = 4.1568e-01, PNorm = 73.3534, GNorm = 2.1170, lr_0 = 7.1163e-04
Loss = 3.6112e-01, PNorm = 73.4595, GNorm = 1.3871, lr_0 = 7.1538e-04
Loss = 4.1625e-01, PNorm = 73.5807, GNorm = 1.5305, lr_0 = 7.1913e-04
Loss = 3.6262e-01, PNorm = 73.6854, GNorm = 0.9461, lr_0 = 7.2288e-04
Loss = 3.4391e-01, PNorm = 73.7938, GNorm = 1.0929, lr_0 = 7.2663e-04
Loss = 3.7692e-01, PNorm = 73.8924, GNorm = 1.1079, lr_0 = 7.3038e-04
Loss = 4.6803e-01, PNorm = 74.0114, GNorm = 1.2186, lr_0 = 7.3413e-04
Loss = 3.9487e-01, PNorm = 74.1368, GNorm = 1.2693, lr_0 = 7.3788e-04
Loss = 4.4765e-01, PNorm = 74.2662, GNorm = 1.4936, lr_0 = 7.4163e-04
Loss = 4.4406e-01, PNorm = 74.4043, GNorm = 1.4760, lr_0 = 7.4538e-04
Loss = 5.0475e-01, PNorm = 74.5413, GNorm = 1.1759, lr_0 = 7.4913e-04
Loss = 3.5498e-01, PNorm = 74.6676, GNorm = 1.2332, lr_0 = 7.5288e-04
Loss = 4.0077e-01, PNorm = 74.7947, GNorm = 1.7318, lr_0 = 7.5663e-04
Loss = 3.7272e-01, PNorm = 74.9062, GNorm = 0.9421, lr_0 = 7.6038e-04
Loss = 4.1236e-01, PNorm = 75.0374, GNorm = 1.5866, lr_0 = 7.6413e-04
Loss = 4.1248e-01, PNorm = 75.1649, GNorm = 1.0128, lr_0 = 7.6788e-04
Loss = 3.7644e-01, PNorm = 75.3025, GNorm = 0.8961, lr_0 = 7.7163e-04
Loss = 4.2350e-01, PNorm = 75.4316, GNorm = 1.2118, lr_0 = 7.7538e-04
Loss = 4.3344e-01, PNorm = 75.5586, GNorm = 1.1138, lr_0 = 7.7913e-04
Loss = 4.5127e-01, PNorm = 75.7074, GNorm = 1.5191, lr_0 = 7.8288e-04
Loss = 4.4870e-01, PNorm = 75.8529, GNorm = 1.1946, lr_0 = 7.8663e-04
Loss = 4.2490e-01, PNorm = 76.0000, GNorm = 1.2857, lr_0 = 7.9038e-04
Loss = 4.4461e-01, PNorm = 76.1522, GNorm = 1.3564, lr_0 = 7.9413e-04
Loss = 4.2637e-01, PNorm = 76.2909, GNorm = 0.8571, lr_0 = 7.9788e-04
Loss = 4.1130e-01, PNorm = 76.4320, GNorm = 0.9394, lr_0 = 8.0163e-04
Loss = 4.1236e-01, PNorm = 76.5603, GNorm = 1.3244, lr_0 = 8.0538e-04
Loss = 4.0175e-01, PNorm = 76.7067, GNorm = 2.4213, lr_0 = 8.0913e-04
Loss = 4.7034e-01, PNorm = 76.8672, GNorm = 1.5503, lr_0 = 8.1288e-04
Loss = 4.2764e-01, PNorm = 77.0370, GNorm = 1.0842, lr_0 = 8.1663e-04
Loss = 4.3281e-01, PNorm = 77.1974, GNorm = 1.1099, lr_0 = 8.2038e-04
Loss = 4.4210e-01, PNorm = 77.3460, GNorm = 0.8934, lr_0 = 8.2413e-04
Loss = 4.3940e-01, PNorm = 77.4896, GNorm = 1.6396, lr_0 = 8.2788e-04
Loss = 4.1544e-01, PNorm = 77.6548, GNorm = 1.3268, lr_0 = 8.3163e-04
Loss = 4.2072e-01, PNorm = 77.8151, GNorm = 0.9503, lr_0 = 8.3538e-04
Loss = 3.9821e-01, PNorm = 77.9766, GNorm = 1.0522, lr_0 = 8.3913e-04
Loss = 3.7207e-01, PNorm = 78.1263, GNorm = 0.7794, lr_0 = 8.4288e-04
Loss = 5.1886e-01, PNorm = 78.2690, GNorm = 1.4478, lr_0 = 8.4663e-04
Loss = 4.6755e-01, PNorm = 78.4271, GNorm = 1.1261, lr_0 = 8.5038e-04
Loss = 3.9461e-01, PNorm = 78.5929, GNorm = 1.9566, lr_0 = 8.5413e-04
Loss = 4.6763e-01, PNorm = 78.7526, GNorm = 1.4015, lr_0 = 8.5788e-04
Loss = 4.7682e-01, PNorm = 78.9129, GNorm = 1.0893, lr_0 = 8.6163e-04
Loss = 4.1173e-01, PNorm = 79.0791, GNorm = 1.2460, lr_0 = 8.6538e-04
Loss = 4.0810e-01, PNorm = 79.2225, GNorm = 1.3051, lr_0 = 8.6913e-04
Loss = 4.7579e-01, PNorm = 79.3641, GNorm = 0.9105, lr_0 = 8.7288e-04
Loss = 4.3370e-01, PNorm = 79.5091, GNorm = 1.2912, lr_0 = 8.7663e-04
Loss = 4.5327e-01, PNorm = 79.6534, GNorm = 0.9188, lr_0 = 8.8038e-04
Loss = 4.3594e-01, PNorm = 79.7959, GNorm = 1.3598, lr_0 = 8.8413e-04
Loss = 4.1864e-01, PNorm = 79.9463, GNorm = 1.0234, lr_0 = 8.8788e-04
Loss = 4.0532e-01, PNorm = 80.0879, GNorm = 1.1293, lr_0 = 8.9163e-04
Loss = 4.2750e-01, PNorm = 80.2266, GNorm = 1.4944, lr_0 = 8.9538e-04
Loss = 4.3890e-01, PNorm = 80.3739, GNorm = 1.4131, lr_0 = 8.9913e-04
Loss = 3.7020e-01, PNorm = 80.5136, GNorm = 1.3786, lr_0 = 9.0288e-04
Loss = 4.8716e-01, PNorm = 80.6579, GNorm = 1.1922, lr_0 = 9.0663e-04
Loss = 4.3045e-01, PNorm = 80.8052, GNorm = 0.8963, lr_0 = 9.1038e-04
Loss = 4.3229e-01, PNorm = 80.9601, GNorm = 0.8407, lr_0 = 9.1413e-04
Loss = 4.9678e-01, PNorm = 81.1128, GNorm = 1.1302, lr_0 = 9.1788e-04
Loss = 4.0058e-01, PNorm = 81.2704, GNorm = 1.0916, lr_0 = 9.2163e-04
Loss = 4.2226e-01, PNorm = 81.4312, GNorm = 0.9495, lr_0 = 9.2538e-04
Loss = 4.3306e-01, PNorm = 81.5755, GNorm = 1.3260, lr_0 = 9.2913e-04
Loss = 4.2435e-01, PNorm = 81.7276, GNorm = 1.1585, lr_0 = 9.3288e-04
Loss = 4.3530e-01, PNorm = 81.8767, GNorm = 1.0297, lr_0 = 9.3663e-04
Loss = 4.2850e-01, PNorm = 82.0158, GNorm = 1.0266, lr_0 = 9.4038e-04
Loss = 4.6894e-01, PNorm = 82.1627, GNorm = 1.4066, lr_0 = 9.4413e-04
Loss = 4.1443e-01, PNorm = 82.3061, GNorm = 0.7702, lr_0 = 9.4788e-04
Loss = 3.8599e-01, PNorm = 82.4527, GNorm = 0.9515, lr_0 = 9.5163e-04
Loss = 3.9181e-01, PNorm = 82.6008, GNorm = 0.8891, lr_0 = 9.5538e-04
Loss = 4.2340e-01, PNorm = 82.7530, GNorm = 0.9349, lr_0 = 9.5913e-04
Loss = 4.5092e-01, PNorm = 82.9202, GNorm = 1.8205, lr_0 = 9.6288e-04
Loss = 4.4120e-01, PNorm = 83.0931, GNorm = 0.8232, lr_0 = 9.6663e-04
Loss = 4.0848e-01, PNorm = 83.2620, GNorm = 1.0199, lr_0 = 9.7038e-04
Loss = 4.1768e-01, PNorm = 83.4309, GNorm = 1.0652, lr_0 = 9.7413e-04
Loss = 4.7975e-01, PNorm = 83.5885, GNorm = 1.2196, lr_0 = 9.7788e-04
Loss = 4.4178e-01, PNorm = 83.7589, GNorm = 1.0284, lr_0 = 9.8163e-04
Loss = 4.7294e-01, PNorm = 83.9347, GNorm = 0.8157, lr_0 = 9.8537e-04
Loss = 3.9455e-01, PNorm = 84.0941, GNorm = 1.0145, lr_0 = 9.8912e-04
Loss = 4.2908e-01, PNorm = 84.2475, GNorm = 0.9062, lr_0 = 9.9288e-04
Loss = 4.4100e-01, PNorm = 84.4100, GNorm = 1.2515, lr_0 = 9.9663e-04
Loss = 4.9351e-01, PNorm = 84.5823, GNorm = 0.9926, lr_0 = 9.9993e-04
Validation mae = 0.128120
Epoch 2
Loss = 3.3899e-01, PNorm = 84.7684, GNorm = 1.6280, lr_0 = 9.9925e-04
Loss = 3.0246e-01, PNorm = 84.9278, GNorm = 1.0505, lr_0 = 9.9856e-04
Loss = 2.9137e-01, PNorm = 85.0839, GNorm = 0.9357, lr_0 = 9.9788e-04
Loss = 2.7324e-01, PNorm = 85.2496, GNorm = 0.6809, lr_0 = 9.9719e-04
Loss = 2.5102e-01, PNorm = 85.3964, GNorm = 0.7758, lr_0 = 9.9651e-04
Loss = 3.0247e-01, PNorm = 85.5467, GNorm = 1.2382, lr_0 = 9.9583e-04
Loss = 2.5099e-01, PNorm = 85.6878, GNorm = 0.6529, lr_0 = 9.9515e-04
Loss = 2.7594e-01, PNorm = 85.8359, GNorm = 0.8647, lr_0 = 9.9446e-04
Loss = 2.6939e-01, PNorm = 85.9830, GNorm = 0.8512, lr_0 = 9.9378e-04
Loss = 3.0357e-01, PNorm = 86.1611, GNorm = 0.7802, lr_0 = 9.9310e-04
Loss = 2.4313e-01, PNorm = 86.3374, GNorm = 0.7873, lr_0 = 9.9242e-04
Loss = 2.7325e-01, PNorm = 86.5083, GNorm = 0.8696, lr_0 = 9.9174e-04
Loss = 2.2794e-01, PNorm = 86.6670, GNorm = 0.9068, lr_0 = 9.9106e-04
Loss = 3.1719e-01, PNorm = 86.8316, GNorm = 1.5747, lr_0 = 9.9038e-04
Loss = 3.8816e-01, PNorm = 87.0230, GNorm = 1.2988, lr_0 = 9.8971e-04
Loss = 2.8091e-01, PNorm = 87.2467, GNorm = 1.2607, lr_0 = 9.8903e-04
Loss = 3.1210e-01, PNorm = 87.4518, GNorm = 0.9828, lr_0 = 9.8835e-04
Loss = 2.6706e-01, PNorm = 87.6584, GNorm = 1.2847, lr_0 = 9.8767e-04
Loss = 2.9506e-01, PNorm = 87.8415, GNorm = 1.0923, lr_0 = 9.8700e-04
Loss = 2.8326e-01, PNorm = 88.0225, GNorm = 1.5240, lr_0 = 9.8632e-04
Loss = 2.8195e-01, PNorm = 88.1985, GNorm = 0.8917, lr_0 = 9.8564e-04
Loss = 2.4401e-01, PNorm = 88.3688, GNorm = 1.2697, lr_0 = 9.8497e-04
Loss = 2.8475e-01, PNorm = 88.5304, GNorm = 0.8801, lr_0 = 9.8429e-04
Loss = 2.9652e-01, PNorm = 88.6986, GNorm = 0.7774, lr_0 = 9.8362e-04
Loss = 2.7109e-01, PNorm = 88.8793, GNorm = 1.4360, lr_0 = 9.8295e-04
Loss = 3.3064e-01, PNorm = 89.0651, GNorm = 0.8532, lr_0 = 9.8227e-04
Loss = 3.0666e-01, PNorm = 89.2486, GNorm = 0.9984, lr_0 = 9.8160e-04
Loss = 3.1294e-01, PNorm = 89.4326, GNorm = 1.4409, lr_0 = 9.8093e-04
Loss = 2.9779e-01, PNorm = 89.6081, GNorm = 0.7982, lr_0 = 9.8026e-04
Loss = 3.0819e-01, PNorm = 89.7937, GNorm = 1.3402, lr_0 = 9.7958e-04
Loss = 3.0652e-01, PNorm = 89.9581, GNorm = 1.1609, lr_0 = 9.7891e-04
Loss = 3.1080e-01, PNorm = 90.1384, GNorm = 0.8023, lr_0 = 9.7824e-04
Loss = 2.9123e-01, PNorm = 90.3119, GNorm = 1.2198, lr_0 = 9.7757e-04
Loss = 3.1846e-01, PNorm = 90.4820, GNorm = 1.4986, lr_0 = 9.7690e-04
Loss = 2.8535e-01, PNorm = 90.6615, GNorm = 1.0706, lr_0 = 9.7623e-04
Loss = 3.2528e-01, PNorm = 90.8084, GNorm = 1.0953, lr_0 = 9.7556e-04
Loss = 3.5821e-01, PNorm = 90.9732, GNorm = 1.2839, lr_0 = 9.7490e-04
Loss = 3.0126e-01, PNorm = 91.1261, GNorm = 1.0472, lr_0 = 9.7423e-04
Loss = 2.7642e-01, PNorm = 91.2701, GNorm = 0.8530, lr_0 = 9.7356e-04
Loss = 3.1040e-01, PNorm = 91.4191, GNorm = 0.9495, lr_0 = 9.7289e-04
Loss = 2.9310e-01, PNorm = 91.5774, GNorm = 1.4875, lr_0 = 9.7223e-04
Loss = 3.0438e-01, PNorm = 91.7327, GNorm = 1.2392, lr_0 = 9.7156e-04
Loss = 3.2867e-01, PNorm = 91.9011, GNorm = 0.9618, lr_0 = 9.7090e-04
Loss = 3.1836e-01, PNorm = 92.0668, GNorm = 0.9384, lr_0 = 9.7023e-04
Loss = 3.1385e-01, PNorm = 92.2436, GNorm = 1.1069, lr_0 = 9.6957e-04
Loss = 3.1787e-01, PNorm = 92.4181, GNorm = 0.8374, lr_0 = 9.6890e-04
Loss = 3.5002e-01, PNorm = 92.6005, GNorm = 1.1040, lr_0 = 9.6824e-04
Loss = 3.1415e-01, PNorm = 92.7872, GNorm = 0.8068, lr_0 = 9.6757e-04
Loss = 3.1494e-01, PNorm = 92.9660, GNorm = 1.4259, lr_0 = 9.6691e-04
Loss = 3.2000e-01, PNorm = 93.1486, GNorm = 0.8628, lr_0 = 9.6625e-04
Loss = 3.2754e-01, PNorm = 93.3050, GNorm = 0.9203, lr_0 = 9.6559e-04
Loss = 3.0452e-01, PNorm = 93.4636, GNorm = 0.9089, lr_0 = 9.6493e-04
Loss = 2.9822e-01, PNorm = 93.6303, GNorm = 1.2554, lr_0 = 9.6427e-04
Loss = 2.9847e-01, PNorm = 93.7883, GNorm = 1.1726, lr_0 = 9.6360e-04
Loss = 3.2972e-01, PNorm = 93.9547, GNorm = 0.9589, lr_0 = 9.6294e-04
Loss = 3.3868e-01, PNorm = 94.1358, GNorm = 0.9887, lr_0 = 9.6228e-04
Loss = 3.3597e-01, PNorm = 94.3075, GNorm = 0.9973, lr_0 = 9.6163e-04
Loss = 2.5750e-01, PNorm = 94.4592, GNorm = 0.9311, lr_0 = 9.6097e-04
Loss = 3.1633e-01, PNorm = 94.6153, GNorm = 1.0362, lr_0 = 9.6031e-04
Loss = 3.5787e-01, PNorm = 94.7696, GNorm = 0.8034, lr_0 = 9.5965e-04
Loss = 2.9595e-01, PNorm = 94.9197, GNorm = 0.7982, lr_0 = 9.5899e-04
Loss = 3.4852e-01, PNorm = 95.0795, GNorm = 1.0217, lr_0 = 9.5834e-04
Loss = 3.6762e-01, PNorm = 95.2411, GNorm = 1.5492, lr_0 = 9.5768e-04
Loss = 3.3438e-01, PNorm = 95.4053, GNorm = 1.0107, lr_0 = 9.5702e-04
Loss = 3.1990e-01, PNorm = 95.5865, GNorm = 1.7051, lr_0 = 9.5637e-04
Loss = 3.0317e-01, PNorm = 95.7493, GNorm = 1.7101, lr_0 = 9.5571e-04
Loss = 3.1339e-01, PNorm = 95.9091, GNorm = 0.6039, lr_0 = 9.5506e-04
Loss = 2.9921e-01, PNorm = 96.0745, GNorm = 0.8769, lr_0 = 9.5440e-04
Loss = 2.9414e-01, PNorm = 96.2233, GNorm = 0.6371, lr_0 = 9.5375e-04
Loss = 3.2391e-01, PNorm = 96.3826, GNorm = 1.4683, lr_0 = 9.5310e-04
Loss = 2.8913e-01, PNorm = 96.5291, GNorm = 1.0794, lr_0 = 9.5244e-04
Loss = 3.0957e-01, PNorm = 96.6780, GNorm = 0.8840, lr_0 = 9.5179e-04
Loss = 2.7867e-01, PNorm = 96.8231, GNorm = 1.4005, lr_0 = 9.5114e-04
Loss = 3.5584e-01, PNorm = 96.9722, GNorm = 0.6381, lr_0 = 9.5049e-04
Loss = 3.3080e-01, PNorm = 97.1328, GNorm = 0.9593, lr_0 = 9.4984e-04
Loss = 3.2636e-01, PNorm = 97.2877, GNorm = 0.8794, lr_0 = 9.4919e-04
Loss = 3.1584e-01, PNorm = 97.4464, GNorm = 1.4346, lr_0 = 9.4854e-04
Loss = 3.1096e-01, PNorm = 97.6160, GNorm = 0.8794, lr_0 = 9.4789e-04
Loss = 3.1492e-01, PNorm = 97.7691, GNorm = 1.3800, lr_0 = 9.4724e-04
Loss = 3.5855e-01, PNorm = 97.9222, GNorm = 0.9788, lr_0 = 9.4659e-04
Loss = 2.9686e-01, PNorm = 98.0872, GNorm = 0.9649, lr_0 = 9.4594e-04
Loss = 3.0306e-01, PNorm = 98.2340, GNorm = 0.8571, lr_0 = 9.4529e-04
Loss = 2.9835e-01, PNorm = 98.3903, GNorm = 0.8891, lr_0 = 9.4464e-04
Loss = 3.1235e-01, PNorm = 98.5321, GNorm = 0.9041, lr_0 = 9.4400e-04
Loss = 3.1581e-01, PNorm = 98.6701, GNorm = 0.9871, lr_0 = 9.4335e-04
Loss = 2.9774e-01, PNorm = 98.8157, GNorm = 1.5167, lr_0 = 9.4270e-04
Loss = 3.2773e-01, PNorm = 98.9528, GNorm = 1.7191, lr_0 = 9.4206e-04
Loss = 3.1378e-01, PNorm = 99.1030, GNorm = 1.1552, lr_0 = 9.4141e-04
Loss = 3.0655e-01, PNorm = 99.2662, GNorm = 0.9237, lr_0 = 9.4077e-04
Loss = 3.3819e-01, PNorm = 99.4212, GNorm = 0.8391, lr_0 = 9.4012e-04
Loss = 3.1187e-01, PNorm = 99.6010, GNorm = 1.1698, lr_0 = 9.3948e-04
Loss = 3.5599e-01, PNorm = 99.7675, GNorm = 1.6878, lr_0 = 9.3884e-04
Loss = 3.4677e-01, PNorm = 99.9370, GNorm = 0.9007, lr_0 = 9.3819e-04
Loss = 3.2297e-01, PNorm = 100.1056, GNorm = 0.7559, lr_0 = 9.3755e-04
Loss = 3.1052e-01, PNorm = 100.2506, GNorm = 1.1493, lr_0 = 9.3691e-04
Loss = 2.9146e-01, PNorm = 100.4034, GNorm = 1.3237, lr_0 = 9.3627e-04
Loss = 3.4342e-01, PNorm = 100.5418, GNorm = 0.7762, lr_0 = 9.3562e-04
Loss = 3.7502e-01, PNorm = 100.7107, GNorm = 1.5457, lr_0 = 9.3498e-04
Loss = 3.0613e-01, PNorm = 100.8788, GNorm = 1.0584, lr_0 = 9.3434e-04
Loss = 3.1001e-01, PNorm = 101.0406, GNorm = 0.9381, lr_0 = 9.3370e-04
Loss = 3.4122e-01, PNorm = 101.1940, GNorm = 0.7569, lr_0 = 9.3306e-04
Loss = 3.6297e-01, PNorm = 101.3380, GNorm = 1.6308, lr_0 = 9.3242e-04
Loss = 2.7708e-01, PNorm = 101.4776, GNorm = 1.5588, lr_0 = 9.3178e-04
Loss = 2.9305e-01, PNorm = 101.6154, GNorm = 1.1410, lr_0 = 9.3115e-04
Loss = 3.3983e-01, PNorm = 101.7395, GNorm = 1.4473, lr_0 = 9.3051e-04
Loss = 3.8088e-01, PNorm = 101.8926, GNorm = 1.7502, lr_0 = 9.2987e-04
Loss = 3.7314e-01, PNorm = 102.0393, GNorm = 1.1306, lr_0 = 9.2923e-04
Loss = 3.5314e-01, PNorm = 102.1980, GNorm = 1.5301, lr_0 = 9.2860e-04
Loss = 3.7964e-01, PNorm = 102.3478, GNorm = 0.9531, lr_0 = 9.2796e-04
Loss = 2.9074e-01, PNorm = 102.4902, GNorm = 1.3127, lr_0 = 9.2733e-04
Loss = 3.4284e-01, PNorm = 102.6351, GNorm = 1.2550, lr_0 = 9.2669e-04
Loss = 3.6344e-01, PNorm = 102.7728, GNorm = 1.0070, lr_0 = 9.2606e-04
Loss = 3.6132e-01, PNorm = 102.9256, GNorm = 0.7671, lr_0 = 9.2542e-04
Loss = 3.1279e-01, PNorm = 103.0675, GNorm = 1.0543, lr_0 = 9.2479e-04
Loss = 3.3464e-01, PNorm = 103.2140, GNorm = 1.0299, lr_0 = 9.2415e-04
Loss = 2.8101e-01, PNorm = 103.3487, GNorm = 1.5893, lr_0 = 9.2352e-04
Loss = 3.0380e-01, PNorm = 103.4681, GNorm = 0.9550, lr_0 = 9.2289e-04
Loss = 3.3836e-01, PNorm = 103.6025, GNorm = 1.8718, lr_0 = 9.2226e-04
Loss = 3.2839e-01, PNorm = 103.7437, GNorm = 1.5362, lr_0 = 9.2162e-04
Loss = 3.5509e-01, PNorm = 103.8946, GNorm = 1.2154, lr_0 = 9.2099e-04
Validation mae = 0.127450
Epoch 3
Loss = 2.3014e-01, PNorm = 104.0464, GNorm = 0.9163, lr_0 = 9.2036e-04
Loss = 2.0831e-01, PNorm = 104.1768, GNorm = 0.6228, lr_0 = 9.1973e-04
Loss = 1.7484e-01, PNorm = 104.3040, GNorm = 0.8528, lr_0 = 9.1910e-04
Loss = 1.9758e-01, PNorm = 104.4190, GNorm = 1.0758, lr_0 = 9.1847e-04
Loss = 2.0195e-01, PNorm = 104.5304, GNorm = 1.3991, lr_0 = 9.1784e-04
Loss = 1.9287e-01, PNorm = 104.6505, GNorm = 0.7242, lr_0 = 9.1721e-04
Loss = 1.7384e-01, PNorm = 104.7649, GNorm = 1.2618, lr_0 = 9.1658e-04
Loss = 1.9438e-01, PNorm = 104.8641, GNorm = 0.9233, lr_0 = 9.1596e-04
Loss = 1.9750e-01, PNorm = 104.9636, GNorm = 2.0661, lr_0 = 9.1533e-04
Loss = 1.7482e-01, PNorm = 105.0832, GNorm = 1.0373, lr_0 = 9.1470e-04
Loss = 1.6638e-01, PNorm = 105.1887, GNorm = 0.6168, lr_0 = 9.1408e-04
Loss = 1.7460e-01, PNorm = 105.3127, GNorm = 1.0516, lr_0 = 9.1345e-04
Loss = 1.5594e-01, PNorm = 105.4107, GNorm = 1.6917, lr_0 = 9.1282e-04
Loss = 1.9662e-01, PNorm = 105.5256, GNorm = 0.6758, lr_0 = 9.1220e-04
Loss = 1.9930e-01, PNorm = 105.6249, GNorm = 0.9251, lr_0 = 9.1157e-04
Loss = 1.7882e-01, PNorm = 105.7508, GNorm = 0.7588, lr_0 = 9.1095e-04
Loss = 1.8190e-01, PNorm = 105.8527, GNorm = 0.9816, lr_0 = 9.1032e-04
Loss = 1.9038e-01, PNorm = 105.9715, GNorm = 0.8726, lr_0 = 9.0970e-04
Loss = 2.0427e-01, PNorm = 106.0922, GNorm = 0.8025, lr_0 = 9.0908e-04
Loss = 1.9310e-01, PNorm = 106.2050, GNorm = 0.8647, lr_0 = 9.0846e-04
Loss = 1.6501e-01, PNorm = 106.3211, GNorm = 0.7826, lr_0 = 9.0783e-04
Loss = 2.0911e-01, PNorm = 106.4268, GNorm = 0.7717, lr_0 = 9.0721e-04
Loss = 1.9432e-01, PNorm = 106.5449, GNorm = 0.6780, lr_0 = 9.0659e-04
Loss = 1.7768e-01, PNorm = 106.6525, GNorm = 0.9383, lr_0 = 9.0597e-04
Loss = 2.0418e-01, PNorm = 106.7772, GNorm = 0.7893, lr_0 = 9.0535e-04
Loss = 1.6937e-01, PNorm = 106.8876, GNorm = 0.5584, lr_0 = 9.0473e-04
Loss = 2.3619e-01, PNorm = 107.0077, GNorm = 0.7212, lr_0 = 9.0411e-04
Loss = 2.2507e-01, PNorm = 107.1340, GNorm = 0.5261, lr_0 = 9.0349e-04
Loss = 1.9777e-01, PNorm = 107.2558, GNorm = 1.1693, lr_0 = 9.0287e-04
Loss = 2.2475e-01, PNorm = 107.3773, GNorm = 0.7875, lr_0 = 9.0225e-04
Loss = 1.6919e-01, PNorm = 107.5015, GNorm = 0.5418, lr_0 = 9.0163e-04
Loss = 1.9486e-01, PNorm = 107.5992, GNorm = 0.9358, lr_0 = 9.0102e-04
Loss = 1.7241e-01, PNorm = 107.7231, GNorm = 0.9341, lr_0 = 9.0040e-04
Loss = 1.8331e-01, PNorm = 107.8276, GNorm = 1.2047, lr_0 = 8.9978e-04
Loss = 1.7924e-01, PNorm = 107.9357, GNorm = 1.1772, lr_0 = 8.9916e-04
Loss = 2.1920e-01, PNorm = 108.0450, GNorm = 0.7310, lr_0 = 8.9855e-04
Loss = 1.7157e-01, PNorm = 108.1575, GNorm = 0.7599, lr_0 = 8.9793e-04
Loss = 1.5235e-01, PNorm = 108.2522, GNorm = 1.0788, lr_0 = 8.9732e-04
Loss = 2.0421e-01, PNorm = 108.3522, GNorm = 0.9027, lr_0 = 8.9670e-04
Loss = 1.9461e-01, PNorm = 108.4663, GNorm = 1.1567, lr_0 = 8.9609e-04
Loss = 2.0281e-01, PNorm = 108.5853, GNorm = 1.0690, lr_0 = 8.9548e-04
Loss = 1.8045e-01, PNorm = 108.6966, GNorm = 0.8883, lr_0 = 8.9486e-04
Loss = 1.9125e-01, PNorm = 108.8122, GNorm = 0.8926, lr_0 = 8.9425e-04
Loss = 2.1550e-01, PNorm = 108.9315, GNorm = 0.7471, lr_0 = 8.9364e-04
Loss = 1.5903e-01, PNorm = 109.0521, GNorm = 0.8022, lr_0 = 8.9302e-04
Loss = 2.1937e-01, PNorm = 109.1785, GNorm = 0.9378, lr_0 = 8.9241e-04
Loss = 1.7397e-01, PNorm = 109.3043, GNorm = 0.8733, lr_0 = 8.9180e-04
Loss = 1.5036e-01, PNorm = 109.4194, GNorm = 0.7691, lr_0 = 8.9119e-04
Loss = 1.6142e-01, PNorm = 109.5358, GNorm = 0.6510, lr_0 = 8.9058e-04
Loss = 1.8182e-01, PNorm = 109.6397, GNorm = 1.4224, lr_0 = 8.8997e-04
Loss = 1.9788e-01, PNorm = 109.7571, GNorm = 0.8906, lr_0 = 8.8936e-04
Loss = 1.9130e-01, PNorm = 109.8729, GNorm = 0.7831, lr_0 = 8.8875e-04
Loss = 2.2387e-01, PNorm = 109.9893, GNorm = 1.2224, lr_0 = 8.8814e-04
Loss = 2.2016e-01, PNorm = 110.1117, GNorm = 0.6280, lr_0 = 8.8753e-04
Loss = 2.2179e-01, PNorm = 110.2312, GNorm = 0.9770, lr_0 = 8.8693e-04
Loss = 1.8273e-01, PNorm = 110.3596, GNorm = 0.8956, lr_0 = 8.8632e-04
Loss = 2.1906e-01, PNorm = 110.4779, GNorm = 0.7848, lr_0 = 8.8571e-04
Loss = 2.3699e-01, PNorm = 110.6121, GNorm = 1.5003, lr_0 = 8.8510e-04
Loss = 1.9055e-01, PNorm = 110.7316, GNorm = 1.4557, lr_0 = 8.8450e-04
Loss = 2.1615e-01, PNorm = 110.8560, GNorm = 1.8506, lr_0 = 8.8389e-04
Loss = 1.9711e-01, PNorm = 110.9840, GNorm = 0.8899, lr_0 = 8.8329e-04
Loss = 1.9146e-01, PNorm = 111.1051, GNorm = 0.7724, lr_0 = 8.8268e-04
Loss = 2.0381e-01, PNorm = 111.2281, GNorm = 1.3690, lr_0 = 8.8208e-04
Loss = 1.8113e-01, PNorm = 111.3366, GNorm = 0.8048, lr_0 = 8.8147e-04
Loss = 2.1181e-01, PNorm = 111.4486, GNorm = 0.7359, lr_0 = 8.8087e-04
Loss = 1.7924e-01, PNorm = 111.5686, GNorm = 0.8595, lr_0 = 8.8026e-04
Loss = 1.9411e-01, PNorm = 111.6850, GNorm = 0.9286, lr_0 = 8.7966e-04
Loss = 1.9888e-01, PNorm = 111.7967, GNorm = 1.0018, lr_0 = 8.7906e-04
Loss = 1.9206e-01, PNorm = 111.9137, GNorm = 0.8170, lr_0 = 8.7846e-04
Loss = 2.2671e-01, PNorm = 112.0318, GNorm = 0.7654, lr_0 = 8.7785e-04
Loss = 2.0067e-01, PNorm = 112.1717, GNorm = 0.9494, lr_0 = 8.7725e-04
Loss = 2.2576e-01, PNorm = 112.2937, GNorm = 1.4779, lr_0 = 8.7665e-04
Loss = 1.8687e-01, PNorm = 112.4239, GNorm = 1.0585, lr_0 = 8.7605e-04
Loss = 2.2734e-01, PNorm = 112.5475, GNorm = 0.7778, lr_0 = 8.7545e-04
Loss = 2.2024e-01, PNorm = 112.6644, GNorm = 0.8258, lr_0 = 8.7485e-04
Loss = 2.0206e-01, PNorm = 112.7843, GNorm = 0.6896, lr_0 = 8.7425e-04
Loss = 2.5296e-01, PNorm = 112.8992, GNorm = 1.0797, lr_0 = 8.7365e-04
Loss = 2.1109e-01, PNorm = 113.0134, GNorm = 0.7341, lr_0 = 8.7306e-04
Loss = 2.3597e-01, PNorm = 113.1337, GNorm = 0.8145, lr_0 = 8.7246e-04
Loss = 2.3930e-01, PNorm = 113.2465, GNorm = 0.9115, lr_0 = 8.7186e-04
Loss = 2.1658e-01, PNorm = 113.3740, GNorm = 0.7927, lr_0 = 8.7126e-04
Loss = 1.9209e-01, PNorm = 113.5194, GNorm = 0.6435, lr_0 = 8.7067e-04
Loss = 2.0273e-01, PNorm = 113.6538, GNorm = 0.7164, lr_0 = 8.7007e-04
Loss = 2.0969e-01, PNorm = 113.7898, GNorm = 0.8165, lr_0 = 8.6947e-04
Loss = 2.0907e-01, PNorm = 113.9306, GNorm = 0.8076, lr_0 = 8.6888e-04
Loss = 2.3020e-01, PNorm = 114.0742, GNorm = 1.1392, lr_0 = 8.6828e-04
Loss = 1.7010e-01, PNorm = 114.2090, GNorm = 1.3120, lr_0 = 8.6769e-04
Loss = 2.0258e-01, PNorm = 114.3424, GNorm = 0.8816, lr_0 = 8.6709e-04
Loss = 2.4455e-01, PNorm = 114.4734, GNorm = 0.9148, lr_0 = 8.6650e-04
Loss = 2.1540e-01, PNorm = 114.6081, GNorm = 1.0532, lr_0 = 8.6590e-04
Loss = 1.9853e-01, PNorm = 114.7299, GNorm = 0.8391, lr_0 = 8.6531e-04
Loss = 2.2489e-01, PNorm = 114.8530, GNorm = 0.8348, lr_0 = 8.6472e-04
Loss = 2.2935e-01, PNorm = 114.9855, GNorm = 1.1084, lr_0 = 8.6413e-04
Loss = 2.0127e-01, PNorm = 115.1089, GNorm = 0.8721, lr_0 = 8.6353e-04
Loss = 2.0789e-01, PNorm = 115.2382, GNorm = 0.7346, lr_0 = 8.6294e-04
Loss = 2.5823e-01, PNorm = 115.3603, GNorm = 0.9910, lr_0 = 8.6235e-04
Loss = 2.0353e-01, PNorm = 115.4963, GNorm = 1.0897, lr_0 = 8.6176e-04
Loss = 2.0589e-01, PNorm = 115.6240, GNorm = 1.1007, lr_0 = 8.6117e-04
Loss = 2.0832e-01, PNorm = 115.7458, GNorm = 0.8390, lr_0 = 8.6058e-04
Loss = 2.3646e-01, PNorm = 115.8653, GNorm = 0.9951, lr_0 = 8.5999e-04
Loss = 2.2089e-01, PNorm = 115.9891, GNorm = 0.9725, lr_0 = 8.5940e-04
Loss = 2.5168e-01, PNorm = 116.1111, GNorm = 1.6114, lr_0 = 8.5881e-04
Loss = 2.1271e-01, PNorm = 116.2426, GNorm = 0.9464, lr_0 = 8.5823e-04
Loss = 2.3557e-01, PNorm = 116.3619, GNorm = 1.3406, lr_0 = 8.5764e-04
Loss = 2.0176e-01, PNorm = 116.4767, GNorm = 0.7424, lr_0 = 8.5705e-04
Loss = 2.1310e-01, PNorm = 116.5876, GNorm = 0.7657, lr_0 = 8.5646e-04
Loss = 2.0081e-01, PNorm = 116.6984, GNorm = 0.7989, lr_0 = 8.5588e-04
Loss = 1.9723e-01, PNorm = 116.8055, GNorm = 0.8280, lr_0 = 8.5529e-04
Loss = 2.0227e-01, PNorm = 116.9174, GNorm = 0.6770, lr_0 = 8.5470e-04
Loss = 2.2272e-01, PNorm = 117.0253, GNorm = 0.8487, lr_0 = 8.5412e-04
Loss = 1.8645e-01, PNorm = 117.1374, GNorm = 1.0204, lr_0 = 8.5353e-04
Loss = 2.2674e-01, PNorm = 117.2586, GNorm = 1.4035, lr_0 = 8.5295e-04
Loss = 2.5601e-01, PNorm = 117.3806, GNorm = 1.3591, lr_0 = 8.5236e-04
Loss = 2.0786e-01, PNorm = 117.5053, GNorm = 0.7537, lr_0 = 8.5178e-04
Loss = 2.2299e-01, PNorm = 117.6285, GNorm = 0.6536, lr_0 = 8.5120e-04
Loss = 2.4784e-01, PNorm = 117.7427, GNorm = 1.0257, lr_0 = 8.5061e-04
Loss = 2.3647e-01, PNorm = 117.8759, GNorm = 0.7985, lr_0 = 8.5003e-04
Loss = 2.0436e-01, PNorm = 117.9998, GNorm = 0.8842, lr_0 = 8.4945e-04
Loss = 2.2904e-01, PNorm = 118.1287, GNorm = 0.9062, lr_0 = 8.4887e-04
Loss = 1.9770e-01, PNorm = 118.2517, GNorm = 1.0192, lr_0 = 8.4828e-04
Validation mae = 0.125174
Epoch 4
Loss = 1.2361e-01, PNorm = 118.3629, GNorm = 0.6778, lr_0 = 8.4770e-04
Loss = 1.2989e-01, PNorm = 118.4489, GNorm = 0.6093, lr_0 = 8.4712e-04
Loss = 1.2674e-01, PNorm = 118.5251, GNorm = 0.8384, lr_0 = 8.4654e-04
Loss = 1.1420e-01, PNorm = 118.6062, GNorm = 0.7758, lr_0 = 8.4596e-04
Loss = 1.1695e-01, PNorm = 118.6810, GNorm = 0.8228, lr_0 = 8.4538e-04
Loss = 1.1937e-01, PNorm = 118.7595, GNorm = 0.8885, lr_0 = 8.4480e-04
Loss = 1.0542e-01, PNorm = 118.8258, GNorm = 0.9666, lr_0 = 8.4423e-04
Loss = 1.0666e-01, PNorm = 118.8960, GNorm = 0.9226, lr_0 = 8.4365e-04
Loss = 1.0609e-01, PNorm = 118.9667, GNorm = 0.5423, lr_0 = 8.4307e-04
Loss = 1.1152e-01, PNorm = 119.0368, GNorm = 1.0310, lr_0 = 8.4249e-04
Loss = 1.0023e-01, PNorm = 119.1121, GNorm = 0.6370, lr_0 = 8.4191e-04
Loss = 1.2615e-01, PNorm = 119.1880, GNorm = 0.8256, lr_0 = 8.4134e-04
Loss = 1.2087e-01, PNorm = 119.2636, GNorm = 0.7053, lr_0 = 8.4076e-04
Loss = 1.1711e-01, PNorm = 119.3399, GNorm = 1.5709, lr_0 = 8.4019e-04
Loss = 1.1027e-01, PNorm = 119.4091, GNorm = 0.6775, lr_0 = 8.3961e-04
Loss = 1.0822e-01, PNorm = 119.4882, GNorm = 0.6207, lr_0 = 8.3903e-04
Loss = 1.3039e-01, PNorm = 119.5698, GNorm = 0.5450, lr_0 = 8.3846e-04
Loss = 1.0903e-01, PNorm = 119.6380, GNorm = 0.6875, lr_0 = 8.3789e-04
Loss = 1.0428e-01, PNorm = 119.7139, GNorm = 0.6610, lr_0 = 8.3731e-04
Loss = 1.1060e-01, PNorm = 119.7959, GNorm = 0.7612, lr_0 = 8.3674e-04
Loss = 1.0629e-01, PNorm = 119.8718, GNorm = 0.9007, lr_0 = 8.3616e-04
Loss = 1.0446e-01, PNorm = 119.9523, GNorm = 0.7520, lr_0 = 8.3559e-04
Loss = 1.1141e-01, PNorm = 120.0332, GNorm = 1.0546, lr_0 = 8.3502e-04
Loss = 1.1613e-01, PNorm = 120.1112, GNorm = 0.9443, lr_0 = 8.3445e-04
Loss = 1.1727e-01, PNorm = 120.1893, GNorm = 0.5933, lr_0 = 8.3388e-04
Loss = 1.3735e-01, PNorm = 120.2662, GNorm = 0.7165, lr_0 = 8.3330e-04
Loss = 1.2752e-01, PNorm = 120.3445, GNorm = 0.6393, lr_0 = 8.3273e-04
Loss = 1.1395e-01, PNorm = 120.4187, GNorm = 1.3135, lr_0 = 8.3216e-04
Loss = 1.1937e-01, PNorm = 120.5003, GNorm = 0.6283, lr_0 = 8.3159e-04
Loss = 1.2871e-01, PNorm = 120.5914, GNorm = 0.6728, lr_0 = 8.3102e-04
Loss = 1.2347e-01, PNorm = 120.6824, GNorm = 1.1235, lr_0 = 8.3045e-04
Loss = 1.0378e-01, PNorm = 120.7736, GNorm = 0.5152, lr_0 = 8.2988e-04
Loss = 1.6040e-01, PNorm = 120.8666, GNorm = 0.4243, lr_0 = 8.2932e-04
Loss = 1.1529e-01, PNorm = 120.9534, GNorm = 0.5581, lr_0 = 8.2875e-04
Loss = 1.0887e-01, PNorm = 121.0420, GNorm = 0.7516, lr_0 = 8.2818e-04
Loss = 1.1135e-01, PNorm = 121.1301, GNorm = 0.9027, lr_0 = 8.2761e-04
Loss = 1.1778e-01, PNorm = 121.2163, GNorm = 0.4096, lr_0 = 8.2705e-04
Loss = 1.1765e-01, PNorm = 121.3005, GNorm = 1.2003, lr_0 = 8.2648e-04
Loss = 1.3623e-01, PNorm = 121.3832, GNorm = 1.2578, lr_0 = 8.2591e-04
Loss = 1.0190e-01, PNorm = 121.4748, GNorm = 0.5419, lr_0 = 8.2535e-04
Loss = 1.0643e-01, PNorm = 121.5537, GNorm = 0.5445, lr_0 = 8.2478e-04
Loss = 1.1808e-01, PNorm = 121.6344, GNorm = 0.7522, lr_0 = 8.2422e-04
Loss = 1.4652e-01, PNorm = 121.7309, GNorm = 0.5824, lr_0 = 8.2365e-04
Loss = 1.3990e-01, PNorm = 121.8329, GNorm = 0.9425, lr_0 = 8.2309e-04
Loss = 1.3331e-01, PNorm = 121.9276, GNorm = 0.8892, lr_0 = 8.2252e-04
Loss = 1.3760e-01, PNorm = 122.0280, GNorm = 1.0259, lr_0 = 8.2196e-04
Loss = 1.3123e-01, PNorm = 122.1184, GNorm = 1.0228, lr_0 = 8.2140e-04
Loss = 1.2186e-01, PNorm = 122.2053, GNorm = 1.4675, lr_0 = 8.2084e-04
Loss = 1.3618e-01, PNorm = 122.3012, GNorm = 0.6559, lr_0 = 8.2027e-04
Loss = 1.4049e-01, PNorm = 122.3901, GNorm = 0.5665, lr_0 = 8.1971e-04
Loss = 1.3019e-01, PNorm = 122.4878, GNorm = 0.7709, lr_0 = 8.1915e-04
Loss = 1.4373e-01, PNorm = 122.5823, GNorm = 1.0058, lr_0 = 8.1859e-04
Loss = 1.2021e-01, PNorm = 122.6715, GNorm = 0.6391, lr_0 = 8.1803e-04
Loss = 1.0886e-01, PNorm = 122.7645, GNorm = 0.6934, lr_0 = 8.1747e-04
Loss = 1.3716e-01, PNorm = 122.8585, GNorm = 0.7294, lr_0 = 8.1691e-04
Loss = 1.3485e-01, PNorm = 122.9591, GNorm = 0.8232, lr_0 = 8.1635e-04
Loss = 1.3118e-01, PNorm = 123.0624, GNorm = 0.8013, lr_0 = 8.1579e-04
Loss = 1.3135e-01, PNorm = 123.1563, GNorm = 0.7558, lr_0 = 8.1523e-04
Loss = 1.1465e-01, PNorm = 123.2510, GNorm = 0.6497, lr_0 = 8.1467e-04
Loss = 1.4529e-01, PNorm = 123.3399, GNorm = 0.9683, lr_0 = 8.1411e-04
Loss = 1.3897e-01, PNorm = 123.4338, GNorm = 0.6339, lr_0 = 8.1355e-04
Loss = 1.3299e-01, PNorm = 123.5295, GNorm = 0.7864, lr_0 = 8.1300e-04
Loss = 1.2363e-01, PNorm = 123.6296, GNorm = 0.8612, lr_0 = 8.1244e-04
Loss = 1.5849e-01, PNorm = 123.7138, GNorm = 0.6594, lr_0 = 8.1188e-04
Loss = 1.4935e-01, PNorm = 123.8231, GNorm = 0.6486, lr_0 = 8.1133e-04
Loss = 1.5621e-01, PNorm = 123.9257, GNorm = 0.6924, lr_0 = 8.1077e-04
Loss = 1.2219e-01, PNorm = 124.0281, GNorm = 0.6686, lr_0 = 8.1022e-04
Loss = 1.2933e-01, PNorm = 124.1279, GNorm = 1.4341, lr_0 = 8.0966e-04
Loss = 1.3913e-01, PNorm = 124.2269, GNorm = 0.6213, lr_0 = 8.0911e-04
Loss = 1.2587e-01, PNorm = 124.3294, GNorm = 1.1622, lr_0 = 8.0855e-04
Loss = 1.5221e-01, PNorm = 124.4311, GNorm = 0.6483, lr_0 = 8.0800e-04
Loss = 1.1064e-01, PNorm = 124.5370, GNorm = 0.6020, lr_0 = 8.0745e-04
Loss = 1.1811e-01, PNorm = 124.6351, GNorm = 0.5007, lr_0 = 8.0689e-04
Loss = 1.4363e-01, PNorm = 124.7224, GNorm = 1.0336, lr_0 = 8.0634e-04
Loss = 1.8758e-01, PNorm = 124.8216, GNorm = 1.7930, lr_0 = 8.0579e-04
Loss = 1.3572e-01, PNorm = 124.9166, GNorm = 0.6401, lr_0 = 8.0523e-04
Loss = 1.4833e-01, PNorm = 125.0154, GNorm = 1.0252, lr_0 = 8.0468e-04
Loss = 1.2774e-01, PNorm = 125.1226, GNorm = 0.5133, lr_0 = 8.0413e-04
Loss = 1.2634e-01, PNorm = 125.2177, GNorm = 0.8363, lr_0 = 8.0358e-04
Loss = 1.3114e-01, PNorm = 125.3078, GNorm = 0.6365, lr_0 = 8.0303e-04
Loss = 1.4775e-01, PNorm = 125.4053, GNorm = 0.9239, lr_0 = 8.0248e-04
Loss = 1.3686e-01, PNorm = 125.5082, GNorm = 0.8051, lr_0 = 8.0193e-04
Loss = 1.4037e-01, PNorm = 125.6003, GNorm = 0.7005, lr_0 = 8.0138e-04
Loss = 1.2785e-01, PNorm = 125.7055, GNorm = 0.6695, lr_0 = 8.0083e-04
Loss = 1.3066e-01, PNorm = 125.8107, GNorm = 0.9197, lr_0 = 8.0028e-04
Loss = 1.3733e-01, PNorm = 125.9037, GNorm = 0.6220, lr_0 = 7.9974e-04
Loss = 1.2844e-01, PNorm = 126.0071, GNorm = 0.7847, lr_0 = 7.9919e-04
Loss = 1.4976e-01, PNorm = 126.0988, GNorm = 0.9988, lr_0 = 7.9864e-04
Loss = 1.3524e-01, PNorm = 126.1997, GNorm = 0.6404, lr_0 = 7.9809e-04
Loss = 1.4602e-01, PNorm = 126.3053, GNorm = 1.0175, lr_0 = 7.9755e-04
Loss = 1.5710e-01, PNorm = 126.4066, GNorm = 0.7947, lr_0 = 7.9700e-04
Loss = 1.5738e-01, PNorm = 126.5188, GNorm = 0.7623, lr_0 = 7.9645e-04
Loss = 1.3584e-01, PNorm = 126.6269, GNorm = 0.6607, lr_0 = 7.9591e-04
Loss = 1.4126e-01, PNorm = 126.7361, GNorm = 0.8007, lr_0 = 7.9536e-04
Loss = 1.2879e-01, PNorm = 126.8413, GNorm = 0.7040, lr_0 = 7.9482e-04
Loss = 1.3102e-01, PNorm = 126.9387, GNorm = 0.7260, lr_0 = 7.9427e-04
Loss = 1.4533e-01, PNorm = 127.0393, GNorm = 0.8104, lr_0 = 7.9373e-04
Loss = 1.3634e-01, PNorm = 127.1499, GNorm = 0.6520, lr_0 = 7.9319e-04
Loss = 1.3826e-01, PNorm = 127.2612, GNorm = 0.7344, lr_0 = 7.9264e-04
Loss = 1.4895e-01, PNorm = 127.3647, GNorm = 0.7588, lr_0 = 7.9210e-04
Loss = 1.4027e-01, PNorm = 127.4667, GNorm = 0.9283, lr_0 = 7.9156e-04
Loss = 1.3601e-01, PNorm = 127.5725, GNorm = 0.6207, lr_0 = 7.9101e-04
Loss = 1.4321e-01, PNorm = 127.6665, GNorm = 1.0664, lr_0 = 7.9047e-04
Loss = 1.6963e-01, PNorm = 127.7833, GNorm = 0.9883, lr_0 = 7.8993e-04
Loss = 1.6810e-01, PNorm = 127.8947, GNorm = 1.3494, lr_0 = 7.8939e-04
Loss = 1.4023e-01, PNorm = 128.0090, GNorm = 0.7126, lr_0 = 7.8885e-04
Loss = 1.4529e-01, PNorm = 128.1174, GNorm = 0.5831, lr_0 = 7.8831e-04
Loss = 1.4378e-01, PNorm = 128.2201, GNorm = 0.6868, lr_0 = 7.8777e-04
Loss = 1.3421e-01, PNorm = 128.3244, GNorm = 0.7008, lr_0 = 7.8723e-04
Loss = 1.4881e-01, PNorm = 128.4207, GNorm = 0.7111, lr_0 = 7.8669e-04
Loss = 1.5653e-01, PNorm = 128.5258, GNorm = 1.0838, lr_0 = 7.8615e-04
Loss = 1.3704e-01, PNorm = 128.6195, GNorm = 0.9836, lr_0 = 7.8561e-04
Loss = 1.3446e-01, PNorm = 128.7234, GNorm = 0.5715, lr_0 = 7.8507e-04
Loss = 1.6855e-01, PNorm = 128.8247, GNorm = 0.8313, lr_0 = 7.8454e-04
Loss = 1.5244e-01, PNorm = 128.9234, GNorm = 0.7665, lr_0 = 7.8400e-04
Loss = 1.6295e-01, PNorm = 129.0258, GNorm = 0.7861, lr_0 = 7.8346e-04
Loss = 1.4855e-01, PNorm = 129.1264, GNorm = 1.4554, lr_0 = 7.8293e-04
Loss = 1.1990e-01, PNorm = 129.2255, GNorm = 0.6881, lr_0 = 7.8239e-04
Loss = 1.4299e-01, PNorm = 129.3225, GNorm = 0.6595, lr_0 = 7.8185e-04
Loss = 1.6911e-01, PNorm = 129.4354, GNorm = 0.7515, lr_0 = 7.8132e-04
Validation mae = 0.127385
Epoch 5
Loss = 8.9064e-02, PNorm = 129.5344, GNorm = 0.7335, lr_0 = 7.8078e-04
Loss = 1.1045e-01, PNorm = 129.6205, GNorm = 0.8369, lr_0 = 7.8025e-04
Loss = 8.3157e-02, PNorm = 129.6970, GNorm = 0.8628, lr_0 = 7.7971e-04
Loss = 7.8631e-02, PNorm = 129.7550, GNorm = 0.4785, lr_0 = 7.7918e-04
Loss = 7.5450e-02, PNorm = 129.8180, GNorm = 0.5206, lr_0 = 7.7864e-04
Loss = 7.6674e-02, PNorm = 129.8685, GNorm = 0.5900, lr_0 = 7.7811e-04
Loss = 8.7648e-02, PNorm = 129.9293, GNorm = 0.6430, lr_0 = 7.7758e-04
Loss = 7.2573e-02, PNorm = 129.9881, GNorm = 0.5446, lr_0 = 7.7705e-04
Loss = 8.4567e-02, PNorm = 130.0394, GNorm = 0.4901, lr_0 = 7.7651e-04
Loss = 8.3151e-02, PNorm = 130.0961, GNorm = 0.5470, lr_0 = 7.7598e-04
Loss = 1.0636e-01, PNorm = 130.1536, GNorm = 1.6442, lr_0 = 7.7545e-04
Loss = 7.8013e-02, PNorm = 130.2101, GNorm = 0.6324, lr_0 = 7.7492e-04
Loss = 8.1013e-02, PNorm = 130.2669, GNorm = 0.4002, lr_0 = 7.7439e-04
Loss = 8.6776e-02, PNorm = 130.3254, GNorm = 0.5885, lr_0 = 7.7386e-04
Loss = 1.0339e-01, PNorm = 130.3868, GNorm = 1.5720, lr_0 = 7.7333e-04
Loss = 8.2073e-02, PNorm = 130.4452, GNorm = 0.5442, lr_0 = 7.7280e-04
Loss = 9.3785e-02, PNorm = 130.5034, GNorm = 0.7058, lr_0 = 7.7227e-04
Loss = 9.4892e-02, PNorm = 130.5601, GNorm = 0.7474, lr_0 = 7.7174e-04
Loss = 1.0389e-01, PNorm = 130.6305, GNorm = 1.2210, lr_0 = 7.7121e-04
Loss = 1.0061e-01, PNorm = 130.6802, GNorm = 1.4580, lr_0 = 7.7068e-04
Loss = 7.6093e-02, PNorm = 130.7504, GNorm = 0.5365, lr_0 = 7.7015e-04
Loss = 6.7735e-02, PNorm = 130.8153, GNorm = 0.4391, lr_0 = 7.6963e-04
Loss = 9.6895e-02, PNorm = 130.8743, GNorm = 0.5273, lr_0 = 7.6910e-04
Loss = 7.8286e-02, PNorm = 130.9401, GNorm = 0.6104, lr_0 = 7.6857e-04
Loss = 8.4518e-02, PNorm = 130.9988, GNorm = 1.1241, lr_0 = 7.6805e-04
Loss = 7.9470e-02, PNorm = 131.0597, GNorm = 0.5194, lr_0 = 7.6752e-04
Loss = 8.4184e-02, PNorm = 131.1270, GNorm = 0.8191, lr_0 = 7.6699e-04
Loss = 8.0192e-02, PNorm = 131.1881, GNorm = 0.4780, lr_0 = 7.6647e-04
Loss = 6.6128e-02, PNorm = 131.2497, GNorm = 0.5841, lr_0 = 7.6594e-04
Loss = 9.0510e-02, PNorm = 131.3060, GNorm = 0.4135, lr_0 = 7.6542e-04
Loss = 9.5346e-02, PNorm = 131.3723, GNorm = 0.4769, lr_0 = 7.6489e-04
Loss = 8.1406e-02, PNorm = 131.4361, GNorm = 0.5002, lr_0 = 7.6437e-04
Loss = 7.1149e-02, PNorm = 131.4993, GNorm = 0.7751, lr_0 = 7.6385e-04
Loss = 8.3748e-02, PNorm = 131.5673, GNorm = 0.7259, lr_0 = 7.6332e-04
Loss = 7.4761e-02, PNorm = 131.6268, GNorm = 0.5079, lr_0 = 7.6280e-04
Loss = 7.3539e-02, PNorm = 131.6915, GNorm = 0.5155, lr_0 = 7.6228e-04
Loss = 8.9743e-02, PNorm = 131.7535, GNorm = 0.6445, lr_0 = 7.6176e-04
Loss = 8.8465e-02, PNorm = 131.8225, GNorm = 0.9629, lr_0 = 7.6123e-04
Loss = 9.8463e-02, PNorm = 131.8868, GNorm = 0.5631, lr_0 = 7.6071e-04
Loss = 9.8042e-02, PNorm = 131.9549, GNorm = 0.7777, lr_0 = 7.6019e-04
Loss = 7.8007e-02, PNorm = 132.0302, GNorm = 0.5544, lr_0 = 7.5967e-04
Loss = 8.1365e-02, PNorm = 132.1003, GNorm = 0.5761, lr_0 = 7.5915e-04
Loss = 9.8625e-02, PNorm = 132.1668, GNorm = 0.4223, lr_0 = 7.5863e-04
Loss = 6.8914e-02, PNorm = 132.2261, GNorm = 0.5768, lr_0 = 7.5811e-04
Loss = 9.1756e-02, PNorm = 132.2906, GNorm = 0.9693, lr_0 = 7.5759e-04
Loss = 8.1039e-02, PNorm = 132.3594, GNorm = 0.6020, lr_0 = 7.5707e-04
Loss = 8.7904e-02, PNorm = 132.4250, GNorm = 0.5244, lr_0 = 7.5655e-04
Loss = 8.2191e-02, PNorm = 132.4892, GNorm = 0.7739, lr_0 = 7.5603e-04
Loss = 8.7835e-02, PNorm = 132.5573, GNorm = 0.6562, lr_0 = 7.5552e-04
Loss = 1.0183e-01, PNorm = 132.6282, GNorm = 0.5263, lr_0 = 7.5500e-04
Loss = 9.2132e-02, PNorm = 132.6945, GNorm = 0.8183, lr_0 = 7.5448e-04
Loss = 8.8996e-02, PNorm = 132.7712, GNorm = 0.6667, lr_0 = 7.5397e-04
Loss = 8.8297e-02, PNorm = 132.8493, GNorm = 0.5548, lr_0 = 7.5345e-04
Loss = 8.8053e-02, PNorm = 132.9189, GNorm = 0.5594, lr_0 = 7.5293e-04
Loss = 8.2495e-02, PNorm = 132.9886, GNorm = 0.4782, lr_0 = 7.5242e-04
Loss = 8.9395e-02, PNorm = 133.0560, GNorm = 0.5772, lr_0 = 7.5190e-04
Loss = 9.7220e-02, PNorm = 133.1371, GNorm = 0.5056, lr_0 = 7.5139e-04
Loss = 8.7931e-02, PNorm = 133.2181, GNorm = 0.6581, lr_0 = 7.5087e-04
Loss = 9.9080e-02, PNorm = 133.2966, GNorm = 0.4868, lr_0 = 7.5036e-04
Loss = 9.1095e-02, PNorm = 133.3737, GNorm = 0.8617, lr_0 = 7.4984e-04
Loss = 7.9228e-02, PNorm = 133.4542, GNorm = 1.2368, lr_0 = 7.4933e-04
Loss = 8.5344e-02, PNorm = 133.5231, GNorm = 0.8700, lr_0 = 7.4882e-04
Loss = 7.6409e-02, PNorm = 133.5944, GNorm = 0.6288, lr_0 = 7.4830e-04
Loss = 1.0180e-01, PNorm = 133.6725, GNorm = 0.8587, lr_0 = 7.4779e-04
Loss = 7.4281e-02, PNorm = 133.7453, GNorm = 0.4733, lr_0 = 7.4728e-04
Loss = 7.5165e-02, PNorm = 133.8241, GNorm = 0.8495, lr_0 = 7.4677e-04
Loss = 7.4059e-02, PNorm = 133.8911, GNorm = 0.8007, lr_0 = 7.4625e-04
Loss = 8.8760e-02, PNorm = 133.9668, GNorm = 0.6635, lr_0 = 7.4574e-04
Loss = 1.0153e-01, PNorm = 134.0422, GNorm = 0.8564, lr_0 = 7.4523e-04
Loss = 8.7764e-02, PNorm = 134.1198, GNorm = 0.8792, lr_0 = 7.4472e-04
Loss = 8.7604e-02, PNorm = 134.1966, GNorm = 0.6697, lr_0 = 7.4421e-04
Loss = 9.4806e-02, PNorm = 134.2731, GNorm = 0.6696, lr_0 = 7.4370e-04
Loss = 1.0713e-01, PNorm = 134.3568, GNorm = 0.5560, lr_0 = 7.4319e-04
Loss = 7.7723e-02, PNorm = 134.4368, GNorm = 0.7398, lr_0 = 7.4268e-04
Loss = 8.4909e-02, PNorm = 134.5006, GNorm = 0.7690, lr_0 = 7.4217e-04
Loss = 8.6151e-02, PNorm = 134.5736, GNorm = 0.7741, lr_0 = 7.4167e-04
Loss = 9.5755e-02, PNorm = 134.6501, GNorm = 1.3166, lr_0 = 7.4116e-04
Loss = 9.8502e-02, PNorm = 134.7436, GNorm = 1.2321, lr_0 = 7.4065e-04
Loss = 8.4882e-02, PNorm = 134.8261, GNorm = 0.4330, lr_0 = 7.4014e-04
Loss = 1.0802e-01, PNorm = 134.9161, GNorm = 0.7893, lr_0 = 7.3964e-04
Loss = 1.0451e-01, PNorm = 134.9990, GNorm = 0.5172, lr_0 = 7.3913e-04
Loss = 9.1559e-02, PNorm = 135.0865, GNorm = 0.6125, lr_0 = 7.3862e-04
Loss = 9.3700e-02, PNorm = 135.1755, GNorm = 0.6614, lr_0 = 7.3812e-04
Loss = 7.5561e-02, PNorm = 135.2601, GNorm = 0.4217, lr_0 = 7.3761e-04
Loss = 1.1636e-01, PNorm = 135.3444, GNorm = 0.8853, lr_0 = 7.3711e-04
Loss = 8.4344e-02, PNorm = 135.4335, GNorm = 0.4707, lr_0 = 7.3660e-04
Loss = 1.0757e-01, PNorm = 135.5287, GNorm = 0.9611, lr_0 = 7.3610e-04
Loss = 9.4165e-02, PNorm = 135.6256, GNorm = 0.7183, lr_0 = 7.3559e-04
Loss = 9.8992e-02, PNorm = 135.7165, GNorm = 0.6959, lr_0 = 7.3509e-04
Loss = 7.9510e-02, PNorm = 135.7985, GNorm = 0.4504, lr_0 = 7.3458e-04
Loss = 9.7216e-02, PNorm = 135.8802, GNorm = 0.6449, lr_0 = 7.3408e-04
Loss = 1.1005e-01, PNorm = 135.9700, GNorm = 0.6500, lr_0 = 7.3358e-04
Loss = 1.1028e-01, PNorm = 136.0645, GNorm = 1.4441, lr_0 = 7.3308e-04
Loss = 1.0914e-01, PNorm = 136.1519, GNorm = 0.7088, lr_0 = 7.3257e-04
Loss = 8.8284e-02, PNorm = 136.2423, GNorm = 0.3924, lr_0 = 7.3207e-04
Loss = 1.0184e-01, PNorm = 136.3230, GNorm = 0.6500, lr_0 = 7.3157e-04
Loss = 8.2118e-02, PNorm = 136.4042, GNorm = 0.4493, lr_0 = 7.3107e-04
Loss = 1.0695e-01, PNorm = 136.4862, GNorm = 0.6634, lr_0 = 7.3057e-04
Loss = 1.1189e-01, PNorm = 136.5737, GNorm = 0.5604, lr_0 = 7.3007e-04
Loss = 9.9791e-02, PNorm = 136.6575, GNorm = 0.5041, lr_0 = 7.2957e-04
Loss = 9.6494e-02, PNorm = 136.7413, GNorm = 0.7902, lr_0 = 7.2907e-04
Loss = 1.0179e-01, PNorm = 136.8294, GNorm = 1.4729, lr_0 = 7.2857e-04
Loss = 7.5078e-02, PNorm = 136.9141, GNorm = 1.0175, lr_0 = 7.2807e-04
Loss = 9.4667e-02, PNorm = 136.9982, GNorm = 0.9499, lr_0 = 7.2757e-04
Loss = 1.0119e-01, PNorm = 137.0743, GNorm = 0.8279, lr_0 = 7.2707e-04
Loss = 1.0721e-01, PNorm = 137.1606, GNorm = 1.3057, lr_0 = 7.2657e-04
Loss = 8.2714e-02, PNorm = 137.2405, GNorm = 0.4090, lr_0 = 7.2608e-04
Loss = 1.0374e-01, PNorm = 137.3247, GNorm = 0.6267, lr_0 = 7.2558e-04
Loss = 9.1855e-02, PNorm = 137.4088, GNorm = 0.6420, lr_0 = 7.2508e-04
Loss = 1.0070e-01, PNorm = 137.4880, GNorm = 1.0265, lr_0 = 7.2458e-04
Loss = 9.4715e-02, PNorm = 137.5680, GNorm = 0.7855, lr_0 = 7.2409e-04
Loss = 8.8771e-02, PNorm = 137.6483, GNorm = 0.8919, lr_0 = 7.2359e-04
Loss = 9.0150e-02, PNorm = 137.7278, GNorm = 1.0394, lr_0 = 7.2310e-04
Loss = 9.4234e-02, PNorm = 137.8137, GNorm = 0.7963, lr_0 = 7.2260e-04
Loss = 9.8632e-02, PNorm = 137.8963, GNorm = 0.4886, lr_0 = 7.2211e-04
Loss = 9.6388e-02, PNorm = 137.9816, GNorm = 0.6435, lr_0 = 7.2161e-04
Loss = 9.9375e-02, PNorm = 138.0582, GNorm = 0.8271, lr_0 = 7.2112e-04
Loss = 1.0185e-01, PNorm = 138.1410, GNorm = 0.7935, lr_0 = 7.2062e-04
Loss = 9.7839e-02, PNorm = 138.2302, GNorm = 0.7338, lr_0 = 7.2013e-04
Loss = 1.0051e-01, PNorm = 138.3184, GNorm = 0.8605, lr_0 = 7.1964e-04
Validation mae = 0.124575
Epoch 6
Loss = 6.4588e-02, PNorm = 138.3927, GNorm = 0.6643, lr_0 = 7.1914e-04
Loss = 5.4198e-02, PNorm = 138.4595, GNorm = 0.4839, lr_0 = 7.1865e-04
Loss = 5.6675e-02, PNorm = 138.5082, GNorm = 0.6634, lr_0 = 7.1816e-04
Loss = 5.9547e-02, PNorm = 138.5510, GNorm = 0.4689, lr_0 = 7.1767e-04
Loss = 7.6918e-02, PNorm = 138.5970, GNorm = 0.3803, lr_0 = 7.1717e-04
Loss = 5.7921e-02, PNorm = 138.6460, GNorm = 0.5239, lr_0 = 7.1668e-04
Loss = 6.9683e-02, PNorm = 138.6985, GNorm = 0.6048, lr_0 = 7.1619e-04
Loss = 6.5380e-02, PNorm = 138.7524, GNorm = 0.5575, lr_0 = 7.1570e-04
Loss = 6.4753e-02, PNorm = 138.8063, GNorm = 0.6976, lr_0 = 7.1521e-04
Loss = 6.8524e-02, PNorm = 138.8546, GNorm = 0.5941, lr_0 = 7.1472e-04
Loss = 5.7831e-02, PNorm = 138.9073, GNorm = 0.4352, lr_0 = 7.1423e-04
Loss = 5.7756e-02, PNorm = 138.9550, GNorm = 0.4834, lr_0 = 7.1374e-04
Loss = 6.8571e-02, PNorm = 139.0014, GNorm = 0.9313, lr_0 = 7.1325e-04
Loss = 5.9403e-02, PNorm = 139.0581, GNorm = 0.7718, lr_0 = 7.1277e-04
Loss = 5.3012e-02, PNorm = 139.1124, GNorm = 0.6528, lr_0 = 7.1228e-04
Loss = 4.9767e-02, PNorm = 139.1660, GNorm = 0.6459, lr_0 = 7.1179e-04
Loss = 5.8105e-02, PNorm = 139.2153, GNorm = 0.5798, lr_0 = 7.1130e-04
Loss = 6.0270e-02, PNorm = 139.2604, GNorm = 0.6103, lr_0 = 7.1081e-04
Loss = 5.3258e-02, PNorm = 139.3110, GNorm = 0.3976, lr_0 = 7.1033e-04
Loss = 6.8221e-02, PNorm = 139.3671, GNorm = 1.6200, lr_0 = 7.0984e-04
Loss = 6.7561e-02, PNorm = 139.4208, GNorm = 0.4987, lr_0 = 7.0935e-04
Loss = 7.5650e-02, PNorm = 139.4759, GNorm = 0.4165, lr_0 = 7.0887e-04
Loss = 5.7509e-02, PNorm = 139.5289, GNorm = 0.6337, lr_0 = 7.0838e-04
Loss = 6.6670e-02, PNorm = 139.5767, GNorm = 0.4949, lr_0 = 7.0790e-04
Loss = 5.3824e-02, PNorm = 139.6254, GNorm = 0.5647, lr_0 = 7.0741e-04
Loss = 5.7535e-02, PNorm = 139.6780, GNorm = 0.5348, lr_0 = 7.0693e-04
Loss = 5.4456e-02, PNorm = 139.7275, GNorm = 0.4075, lr_0 = 7.0644e-04
Loss = 5.7390e-02, PNorm = 139.7779, GNorm = 0.3277, lr_0 = 7.0596e-04
Loss = 5.4545e-02, PNorm = 139.8275, GNorm = 0.7888, lr_0 = 7.0548e-04
Loss = 5.0738e-02, PNorm = 139.8790, GNorm = 0.6591, lr_0 = 7.0499e-04
Loss = 5.5790e-02, PNorm = 139.9196, GNorm = 0.5372, lr_0 = 7.0451e-04
Loss = 7.0928e-02, PNorm = 139.9659, GNorm = 0.4804, lr_0 = 7.0403e-04
Loss = 6.1099e-02, PNorm = 140.0195, GNorm = 0.5950, lr_0 = 7.0354e-04
Loss = 5.8796e-02, PNorm = 140.0719, GNorm = 0.5207, lr_0 = 7.0306e-04
Loss = 6.1907e-02, PNorm = 140.1224, GNorm = 0.8167, lr_0 = 7.0258e-04
Loss = 6.5799e-02, PNorm = 140.1758, GNorm = 0.5177, lr_0 = 7.0210e-04
Loss = 6.1416e-02, PNorm = 140.2266, GNorm = 0.4232, lr_0 = 7.0162e-04
Loss = 6.4548e-02, PNorm = 140.2798, GNorm = 0.3984, lr_0 = 7.0114e-04
Loss = 6.0526e-02, PNorm = 140.3374, GNorm = 0.6406, lr_0 = 7.0066e-04
Loss = 5.3258e-02, PNorm = 140.3900, GNorm = 0.4571, lr_0 = 7.0018e-04
Loss = 6.0486e-02, PNorm = 140.4379, GNorm = 0.5086, lr_0 = 6.9970e-04
Loss = 6.1506e-02, PNorm = 140.4928, GNorm = 0.8192, lr_0 = 6.9922e-04
Loss = 5.9711e-02, PNorm = 140.5498, GNorm = 0.4479, lr_0 = 6.9874e-04
Loss = 5.5757e-02, PNorm = 140.6079, GNorm = 0.4631, lr_0 = 6.9826e-04
Loss = 7.0975e-02, PNorm = 140.6628, GNorm = 0.5621, lr_0 = 6.9778e-04
Loss = 6.4302e-02, PNorm = 140.7207, GNorm = 0.3817, lr_0 = 6.9730e-04
Loss = 5.6030e-02, PNorm = 140.7815, GNorm = 0.5233, lr_0 = 6.9683e-04
Loss = 6.5273e-02, PNorm = 140.8378, GNorm = 0.7377, lr_0 = 6.9635e-04
Loss = 7.1761e-02, PNorm = 140.8988, GNorm = 0.3575, lr_0 = 6.9587e-04
Loss = 5.8178e-02, PNorm = 140.9578, GNorm = 0.4717, lr_0 = 6.9540e-04
Loss = 5.4990e-02, PNorm = 141.0139, GNorm = 0.6355, lr_0 = 6.9492e-04
Loss = 6.1121e-02, PNorm = 141.0667, GNorm = 0.4727, lr_0 = 6.9444e-04
Loss = 5.3638e-02, PNorm = 141.1247, GNorm = 0.4169, lr_0 = 6.9397e-04
Loss = 6.1218e-02, PNorm = 141.1784, GNorm = 0.4077, lr_0 = 6.9349e-04
Loss = 7.0163e-02, PNorm = 141.2367, GNorm = 1.4483, lr_0 = 6.9302e-04
Loss = 6.9480e-02, PNorm = 141.2964, GNorm = 0.5929, lr_0 = 6.9254e-04
Loss = 6.2881e-02, PNorm = 141.3560, GNorm = 0.5476, lr_0 = 6.9207e-04
Loss = 7.5296e-02, PNorm = 141.4203, GNorm = 0.3935, lr_0 = 6.9159e-04
Loss = 6.1332e-02, PNorm = 141.4776, GNorm = 0.3932, lr_0 = 6.9112e-04
Loss = 7.1264e-02, PNorm = 141.5418, GNorm = 0.4230, lr_0 = 6.9065e-04
Loss = 6.6868e-02, PNorm = 141.6121, GNorm = 0.4177, lr_0 = 6.9017e-04
Loss = 6.4545e-02, PNorm = 141.6867, GNorm = 0.6171, lr_0 = 6.8970e-04
Loss = 5.3340e-02, PNorm = 141.7560, GNorm = 0.9138, lr_0 = 6.8923e-04
Loss = 4.8096e-02, PNorm = 141.8203, GNorm = 0.3884, lr_0 = 6.8876e-04
Loss = 6.2542e-02, PNorm = 141.8764, GNorm = 0.7466, lr_0 = 6.8828e-04
Loss = 5.3704e-02, PNorm = 141.9357, GNorm = 0.5476, lr_0 = 6.8781e-04
Loss = 7.3308e-02, PNorm = 141.9967, GNorm = 0.3917, lr_0 = 6.8734e-04
Loss = 7.0570e-02, PNorm = 142.0543, GNorm = 0.9413, lr_0 = 6.8687e-04
Loss = 6.7025e-02, PNorm = 142.1205, GNorm = 0.5692, lr_0 = 6.8640e-04
Loss = 7.4931e-02, PNorm = 142.1986, GNorm = 0.8402, lr_0 = 6.8593e-04
Loss = 7.3076e-02, PNorm = 142.2765, GNorm = 0.4660, lr_0 = 6.8546e-04
Loss = 7.8983e-02, PNorm = 142.3564, GNorm = 1.3411, lr_0 = 6.8499e-04
Loss = 5.7486e-02, PNorm = 142.4368, GNorm = 0.5569, lr_0 = 6.8452e-04
Loss = 5.8088e-02, PNorm = 142.5089, GNorm = 0.3835, lr_0 = 6.8405e-04
Loss = 7.3453e-02, PNorm = 142.5746, GNorm = 0.6634, lr_0 = 6.8358e-04
Loss = 6.4142e-02, PNorm = 142.6412, GNorm = 0.4667, lr_0 = 6.8312e-04
Loss = 6.2687e-02, PNorm = 142.7065, GNorm = 0.5793, lr_0 = 6.8265e-04
Loss = 7.9348e-02, PNorm = 142.7730, GNorm = 0.5336, lr_0 = 6.8218e-04
Loss = 7.7407e-02, PNorm = 142.8349, GNorm = 0.8124, lr_0 = 6.8171e-04
Loss = 7.0793e-02, PNorm = 142.9036, GNorm = 0.5650, lr_0 = 6.8125e-04
Loss = 5.6092e-02, PNorm = 142.9774, GNorm = 0.6966, lr_0 = 6.8078e-04
Loss = 6.6824e-02, PNorm = 143.0377, GNorm = 0.7332, lr_0 = 6.8031e-04
Loss = 6.5834e-02, PNorm = 143.1106, GNorm = 0.4655, lr_0 = 6.7985e-04
Loss = 6.4169e-02, PNorm = 143.1724, GNorm = 0.7884, lr_0 = 6.7938e-04
Loss = 5.8485e-02, PNorm = 143.2439, GNorm = 0.4118, lr_0 = 6.7892e-04
Loss = 7.7198e-02, PNorm = 143.3085, GNorm = 0.8829, lr_0 = 6.7845e-04
Loss = 5.7815e-02, PNorm = 143.3823, GNorm = 0.2936, lr_0 = 6.7799e-04
Loss = 6.1855e-02, PNorm = 143.4556, GNorm = 1.0741, lr_0 = 6.7752e-04
Loss = 6.6217e-02, PNorm = 143.5216, GNorm = 0.3728, lr_0 = 6.7706e-04
Loss = 6.0279e-02, PNorm = 143.5927, GNorm = 0.5173, lr_0 = 6.7659e-04
Loss = 8.0534e-02, PNorm = 143.6604, GNorm = 0.4315, lr_0 = 6.7613e-04
Loss = 5.4743e-02, PNorm = 143.7308, GNorm = 0.4660, lr_0 = 6.7567e-04
Loss = 5.6308e-02, PNorm = 143.7986, GNorm = 0.4375, lr_0 = 6.7520e-04
Loss = 6.4429e-02, PNorm = 143.8556, GNorm = 0.9291, lr_0 = 6.7474e-04
Loss = 6.2742e-02, PNorm = 143.9107, GNorm = 0.6795, lr_0 = 6.7428e-04
Loss = 7.4855e-02, PNorm = 143.9803, GNorm = 1.1543, lr_0 = 6.7382e-04
Loss = 6.4593e-02, PNorm = 144.0433, GNorm = 0.5469, lr_0 = 6.7335e-04
Loss = 7.1984e-02, PNorm = 144.1152, GNorm = 1.1265, lr_0 = 6.7289e-04
Loss = 8.8682e-02, PNorm = 144.1923, GNorm = 0.5702, lr_0 = 6.7243e-04
Loss = 6.9501e-02, PNorm = 144.2664, GNorm = 0.4509, lr_0 = 6.7197e-04
Loss = 6.5502e-02, PNorm = 144.3455, GNorm = 0.5906, lr_0 = 6.7151e-04
Loss = 7.9912e-02, PNorm = 144.4168, GNorm = 0.5148, lr_0 = 6.7105e-04
Loss = 8.2125e-02, PNorm = 144.4864, GNorm = 1.0448, lr_0 = 6.7059e-04
Loss = 8.1664e-02, PNorm = 144.5638, GNorm = 0.9511, lr_0 = 6.7013e-04
Loss = 7.6480e-02, PNorm = 144.6439, GNorm = 0.4450, lr_0 = 6.6967e-04
Loss = 5.9637e-02, PNorm = 144.7187, GNorm = 0.6595, lr_0 = 6.6921e-04
Loss = 8.4378e-02, PNorm = 144.7907, GNorm = 0.5131, lr_0 = 6.6876e-04
Loss = 6.2098e-02, PNorm = 144.8641, GNorm = 0.6288, lr_0 = 6.6830e-04
Loss = 8.0871e-02, PNorm = 144.9362, GNorm = 0.5577, lr_0 = 6.6784e-04
Loss = 6.0837e-02, PNorm = 145.0088, GNorm = 0.4849, lr_0 = 6.6738e-04
Loss = 6.4854e-02, PNorm = 145.0757, GNorm = 0.5914, lr_0 = 6.6693e-04
Loss = 7.8960e-02, PNorm = 145.1458, GNorm = 0.4077, lr_0 = 6.6647e-04
Loss = 6.2232e-02, PNorm = 145.2190, GNorm = 0.3556, lr_0 = 6.6601e-04
Loss = 6.1485e-02, PNorm = 145.2840, GNorm = 0.5759, lr_0 = 6.6556e-04
Loss = 6.3002e-02, PNorm = 145.3482, GNorm = 0.4925, lr_0 = 6.6510e-04
Loss = 7.5376e-02, PNorm = 145.4195, GNorm = 0.8111, lr_0 = 6.6464e-04
Loss = 7.5084e-02, PNorm = 145.4963, GNorm = 0.7088, lr_0 = 6.6419e-04
Loss = 7.7290e-02, PNorm = 145.5692, GNorm = 0.4201, lr_0 = 6.6373e-04
Loss = 7.7748e-02, PNorm = 145.6375, GNorm = 0.5412, lr_0 = 6.6328e-04
Loss = 6.9395e-02, PNorm = 145.7133, GNorm = 0.6547, lr_0 = 6.6282e-04
Validation mae = 0.124096
Epoch 7
Loss = 5.6572e-02, PNorm = 145.7706, GNorm = 0.6697, lr_0 = 6.6237e-04
Loss = 5.8995e-02, PNorm = 145.8249, GNorm = 0.7244, lr_0 = 6.6192e-04
Loss = 5.4145e-02, PNorm = 145.8746, GNorm = 0.5195, lr_0 = 6.6146e-04
Loss = 6.0861e-02, PNorm = 145.9225, GNorm = 0.5640, lr_0 = 6.6101e-04
Loss = 4.7302e-02, PNorm = 145.9715, GNorm = 0.2934, lr_0 = 6.6056e-04
Loss = 4.5643e-02, PNorm = 146.0142, GNorm = 0.4188, lr_0 = 6.6011e-04
Loss = 4.4448e-02, PNorm = 146.0593, GNorm = 0.3998, lr_0 = 6.5965e-04
Loss = 4.4242e-02, PNorm = 146.1014, GNorm = 0.5722, lr_0 = 6.5920e-04
Loss = 4.6674e-02, PNorm = 146.1404, GNorm = 0.4907, lr_0 = 6.5875e-04
Loss = 5.2892e-02, PNorm = 146.1802, GNorm = 0.6143, lr_0 = 6.5830e-04
Loss = 5.9708e-02, PNorm = 146.2191, GNorm = 0.8210, lr_0 = 6.5785e-04
Loss = 5.1926e-02, PNorm = 146.2730, GNorm = 0.7390, lr_0 = 6.5740e-04
Loss = 5.6874e-02, PNorm = 146.3218, GNorm = 0.5048, lr_0 = 6.5695e-04
Loss = 5.1828e-02, PNorm = 146.3666, GNorm = 0.4429, lr_0 = 6.5650e-04
Loss = 4.7983e-02, PNorm = 146.4105, GNorm = 0.5488, lr_0 = 6.5605e-04
Loss = 5.5081e-02, PNorm = 146.4522, GNorm = 0.6069, lr_0 = 6.5560e-04
Loss = 4.4202e-02, PNorm = 146.5017, GNorm = 0.4764, lr_0 = 6.5515e-04
Loss = 4.1108e-02, PNorm = 146.5547, GNorm = 0.6687, lr_0 = 6.5470e-04
Loss = 5.4450e-02, PNorm = 146.6010, GNorm = 0.5331, lr_0 = 6.5425e-04
Loss = 6.1886e-02, PNorm = 146.6447, GNorm = 0.2913, lr_0 = 6.5380e-04
Loss = 5.2394e-02, PNorm = 146.6921, GNorm = 0.7189, lr_0 = 6.5335e-04
Loss = 4.7498e-02, PNorm = 146.7426, GNorm = 0.9344, lr_0 = 6.5291e-04
Loss = 4.5634e-02, PNorm = 146.7865, GNorm = 0.3501, lr_0 = 6.5246e-04
Loss = 5.5857e-02, PNorm = 146.8303, GNorm = 0.6555, lr_0 = 6.5201e-04
Loss = 5.3771e-02, PNorm = 146.8747, GNorm = 0.7964, lr_0 = 6.5157e-04
Loss = 5.3905e-02, PNorm = 146.9252, GNorm = 0.3830, lr_0 = 6.5112e-04
Loss = 4.2266e-02, PNorm = 146.9742, GNorm = 0.3473, lr_0 = 6.5067e-04
Loss = 5.1858e-02, PNorm = 147.0272, GNorm = 0.7496, lr_0 = 6.5023e-04
Loss = 3.9876e-02, PNorm = 147.0804, GNorm = 0.2485, lr_0 = 6.4978e-04
Loss = 5.2923e-02, PNorm = 147.1306, GNorm = 0.9798, lr_0 = 6.4934e-04
Loss = 4.0874e-02, PNorm = 147.1792, GNorm = 0.5068, lr_0 = 6.4889e-04
Loss = 4.6736e-02, PNorm = 147.2300, GNorm = 0.6413, lr_0 = 6.4845e-04
Loss = 4.5938e-02, PNorm = 147.2784, GNorm = 0.5462, lr_0 = 6.4800e-04
Loss = 4.4309e-02, PNorm = 147.3227, GNorm = 0.4252, lr_0 = 6.4756e-04
Loss = 4.0789e-02, PNorm = 147.3676, GNorm = 0.3731, lr_0 = 6.4712e-04
Loss = 6.5190e-02, PNorm = 147.4100, GNorm = 0.3613, lr_0 = 6.4667e-04
Loss = 5.1921e-02, PNorm = 147.4601, GNorm = 0.6963, lr_0 = 6.4623e-04
Loss = 4.0550e-02, PNorm = 147.5129, GNorm = 0.3817, lr_0 = 6.4579e-04
Loss = 5.8123e-02, PNorm = 147.5594, GNorm = 0.8892, lr_0 = 6.4534e-04
Loss = 5.0191e-02, PNorm = 147.6124, GNorm = 0.6460, lr_0 = 6.4490e-04
Loss = 4.1312e-02, PNorm = 147.6640, GNorm = 0.5391, lr_0 = 6.4446e-04
Loss = 4.2519e-02, PNorm = 147.7138, GNorm = 0.3112, lr_0 = 6.4402e-04
Loss = 4.6774e-02, PNorm = 147.7650, GNorm = 0.4141, lr_0 = 6.4358e-04
Loss = 4.7498e-02, PNorm = 147.8134, GNorm = 0.3168, lr_0 = 6.4314e-04
Loss = 4.1469e-02, PNorm = 147.8630, GNorm = 0.5532, lr_0 = 6.4270e-04
Loss = 5.3785e-02, PNorm = 147.9114, GNorm = 0.4779, lr_0 = 6.4226e-04
Loss = 5.2366e-02, PNorm = 147.9660, GNorm = 0.6562, lr_0 = 6.4182e-04
Loss = 3.9175e-02, PNorm = 148.0212, GNorm = 0.4960, lr_0 = 6.4138e-04
Loss = 4.6837e-02, PNorm = 148.0730, GNorm = 0.6457, lr_0 = 6.4094e-04
Loss = 4.1345e-02, PNorm = 148.1215, GNorm = 0.8431, lr_0 = 6.4050e-04
Loss = 4.3659e-02, PNorm = 148.1663, GNorm = 0.4389, lr_0 = 6.4006e-04
Loss = 5.4126e-02, PNorm = 148.2181, GNorm = 0.5083, lr_0 = 6.3962e-04
Loss = 4.0405e-02, PNorm = 148.2681, GNorm = 0.2720, lr_0 = 6.3918e-04
Loss = 4.4412e-02, PNorm = 148.3209, GNorm = 0.4831, lr_0 = 6.3874e-04
Loss = 4.6456e-02, PNorm = 148.3783, GNorm = 0.7372, lr_0 = 6.3831e-04
Loss = 3.8509e-02, PNorm = 148.4255, GNorm = 0.6184, lr_0 = 6.3787e-04
Loss = 5.4123e-02, PNorm = 148.4748, GNorm = 0.4412, lr_0 = 6.3743e-04
Loss = 6.2966e-02, PNorm = 148.5229, GNorm = 2.1733, lr_0 = 6.3700e-04
Loss = 4.3460e-02, PNorm = 148.5766, GNorm = 0.5763, lr_0 = 6.3656e-04
Loss = 4.1193e-02, PNorm = 148.6347, GNorm = 0.2546, lr_0 = 6.3612e-04
Loss = 3.9709e-02, PNorm = 148.6851, GNorm = 0.5377, lr_0 = 6.3569e-04
Loss = 4.4677e-02, PNorm = 148.7307, GNorm = 0.3579, lr_0 = 6.3525e-04
Loss = 4.9942e-02, PNorm = 148.7795, GNorm = 0.5475, lr_0 = 6.3482e-04
Loss = 4.3217e-02, PNorm = 148.8307, GNorm = 0.6988, lr_0 = 6.3438e-04
Loss = 4.8833e-02, PNorm = 148.8790, GNorm = 0.6343, lr_0 = 6.3395e-04
Loss = 4.7330e-02, PNorm = 148.9310, GNorm = 0.4965, lr_0 = 6.3351e-04
Loss = 5.2750e-02, PNorm = 148.9945, GNorm = 0.6223, lr_0 = 6.3308e-04
Loss = 5.4697e-02, PNorm = 149.0510, GNorm = 0.3665, lr_0 = 6.3265e-04
Loss = 5.0838e-02, PNorm = 149.1091, GNorm = 0.6725, lr_0 = 6.3221e-04
Loss = 4.3818e-02, PNorm = 149.1612, GNorm = 0.5481, lr_0 = 6.3178e-04
Loss = 5.0347e-02, PNorm = 149.2135, GNorm = 0.4468, lr_0 = 6.3135e-04
Loss = 5.3548e-02, PNorm = 149.2744, GNorm = 0.6097, lr_0 = 6.3091e-04
Loss = 7.0240e-02, PNorm = 149.3419, GNorm = 0.3948, lr_0 = 6.3048e-04
Loss = 5.3856e-02, PNorm = 149.4127, GNorm = 0.4256, lr_0 = 6.3005e-04
Loss = 5.0560e-02, PNorm = 149.4709, GNorm = 2.7249, lr_0 = 6.2962e-04
Loss = 4.9152e-02, PNorm = 149.5218, GNorm = 0.4221, lr_0 = 6.2919e-04
Loss = 4.5353e-02, PNorm = 149.5752, GNorm = 0.3506, lr_0 = 6.2876e-04
Loss = 5.1152e-02, PNorm = 149.6338, GNorm = 0.6780, lr_0 = 6.2833e-04
Loss = 4.9864e-02, PNorm = 149.6961, GNorm = 0.8131, lr_0 = 6.2789e-04
Loss = 4.9867e-02, PNorm = 149.7541, GNorm = 0.3619, lr_0 = 6.2746e-04
Loss = 5.0826e-02, PNorm = 149.8170, GNorm = 1.0650, lr_0 = 6.2703e-04
Loss = 5.4830e-02, PNorm = 149.8734, GNorm = 0.8630, lr_0 = 6.2661e-04
Loss = 5.3462e-02, PNorm = 149.9412, GNorm = 0.9199, lr_0 = 6.2618e-04
Loss = 5.2265e-02, PNorm = 150.0029, GNorm = 0.3692, lr_0 = 6.2575e-04
Loss = 5.1413e-02, PNorm = 150.0634, GNorm = 0.6459, lr_0 = 6.2532e-04
Loss = 4.8467e-02, PNorm = 150.1204, GNorm = 0.2875, lr_0 = 6.2489e-04
Loss = 4.9808e-02, PNorm = 150.1795, GNorm = 0.4667, lr_0 = 6.2446e-04
Loss = 4.9696e-02, PNorm = 150.2347, GNorm = 0.4557, lr_0 = 6.2403e-04
Loss = 7.1414e-02, PNorm = 150.2928, GNorm = 0.4809, lr_0 = 6.2361e-04
Loss = 5.0698e-02, PNorm = 150.3565, GNorm = 0.3691, lr_0 = 6.2318e-04
Loss = 6.5258e-02, PNorm = 150.4200, GNorm = 0.4129, lr_0 = 6.2275e-04
Loss = 5.7831e-02, PNorm = 150.4836, GNorm = 0.4595, lr_0 = 6.2233e-04
Loss = 6.3368e-02, PNorm = 150.5503, GNorm = 0.4696, lr_0 = 6.2190e-04
Loss = 4.8372e-02, PNorm = 150.6164, GNorm = 0.5847, lr_0 = 6.2147e-04
Loss = 4.5707e-02, PNorm = 150.6816, GNorm = 0.3835, lr_0 = 6.2105e-04
Loss = 5.2042e-02, PNorm = 150.7426, GNorm = 0.6022, lr_0 = 6.2062e-04
Loss = 6.4123e-02, PNorm = 150.8047, GNorm = 0.6115, lr_0 = 6.2020e-04
Loss = 6.0719e-02, PNorm = 150.8625, GNorm = 0.5805, lr_0 = 6.1977e-04
Loss = 7.1303e-02, PNorm = 150.9297, GNorm = 0.5268, lr_0 = 6.1935e-04
Loss = 6.1691e-02, PNorm = 150.9963, GNorm = 0.3906, lr_0 = 6.1892e-04
Loss = 5.1625e-02, PNorm = 151.0619, GNorm = 0.5117, lr_0 = 6.1850e-04
Loss = 4.4094e-02, PNorm = 151.1245, GNorm = 0.6201, lr_0 = 6.1808e-04
Loss = 5.5842e-02, PNorm = 151.1822, GNorm = 1.0114, lr_0 = 6.1765e-04
Loss = 5.8809e-02, PNorm = 151.2406, GNorm = 0.6193, lr_0 = 6.1723e-04
Loss = 5.4139e-02, PNorm = 151.3076, GNorm = 0.3210, lr_0 = 6.1681e-04
Loss = 5.2508e-02, PNorm = 151.3715, GNorm = 0.5547, lr_0 = 6.1638e-04
Loss = 5.9703e-02, PNorm = 151.4434, GNorm = 0.9225, lr_0 = 6.1596e-04
Loss = 6.1719e-02, PNorm = 151.5078, GNorm = 1.1212, lr_0 = 6.1554e-04
Loss = 5.0725e-02, PNorm = 151.5742, GNorm = 0.6493, lr_0 = 6.1512e-04
Loss = 5.4707e-02, PNorm = 151.6357, GNorm = 0.6577, lr_0 = 6.1470e-04
Loss = 4.7677e-02, PNorm = 151.6987, GNorm = 0.3539, lr_0 = 6.1428e-04
Loss = 5.3583e-02, PNorm = 151.7588, GNorm = 0.4620, lr_0 = 6.1385e-04
Loss = 4.5394e-02, PNorm = 151.8157, GNorm = 0.3268, lr_0 = 6.1343e-04
Loss = 6.3774e-02, PNorm = 151.8789, GNorm = 0.3331, lr_0 = 6.1301e-04
Loss = 5.1283e-02, PNorm = 151.9409, GNorm = 0.6926, lr_0 = 6.1259e-04
Loss = 5.0517e-02, PNorm = 152.0062, GNorm = 0.2806, lr_0 = 6.1217e-04
Loss = 7.3146e-02, PNorm = 152.0788, GNorm = 0.9838, lr_0 = 6.1175e-04
Loss = 6.1445e-02, PNorm = 152.1436, GNorm = 0.5060, lr_0 = 6.1134e-04
Loss = 4.2068e-02, PNorm = 152.2037, GNorm = 0.3422, lr_0 = 6.1092e-04
Loss = 4.7723e-02, PNorm = 152.2679, GNorm = 0.3634, lr_0 = 6.1050e-04
Validation mae = 0.124190
Epoch 8
Loss = 4.5284e-02, PNorm = 152.3185, GNorm = 0.4035, lr_0 = 6.1008e-04
Loss = 4.5864e-02, PNorm = 152.3674, GNorm = 0.3520, lr_0 = 6.0966e-04
Loss = 3.8987e-02, PNorm = 152.4058, GNorm = 0.3700, lr_0 = 6.0924e-04
Loss = 3.7850e-02, PNorm = 152.4388, GNorm = 0.3260, lr_0 = 6.0883e-04
Loss = 4.3232e-02, PNorm = 152.4730, GNorm = 0.3699, lr_0 = 6.0841e-04
Loss = 4.6892e-02, PNorm = 152.5176, GNorm = 0.8950, lr_0 = 6.0799e-04
Loss = 4.2077e-02, PNorm = 152.5630, GNorm = 0.5285, lr_0 = 6.0758e-04
Loss = 3.9858e-02, PNorm = 152.6047, GNorm = 0.5513, lr_0 = 6.0716e-04
Loss = 6.5512e-02, PNorm = 152.6600, GNorm = 0.8933, lr_0 = 6.0674e-04
Loss = 4.4237e-02, PNorm = 152.6940, GNorm = 0.6426, lr_0 = 6.0633e-04
Loss = 3.8119e-02, PNorm = 152.7305, GNorm = 0.4624, lr_0 = 6.0591e-04
Loss = 3.7136e-02, PNorm = 152.7690, GNorm = 0.3815, lr_0 = 6.0550e-04
Loss = 4.0579e-02, PNorm = 152.8027, GNorm = 0.2399, lr_0 = 6.0508e-04
Loss = 3.5245e-02, PNorm = 152.8370, GNorm = 0.4942, lr_0 = 6.0467e-04
Loss = 3.6943e-02, PNorm = 152.8722, GNorm = 0.4081, lr_0 = 6.0425e-04
Loss = 3.6109e-02, PNorm = 152.9068, GNorm = 0.3399, lr_0 = 6.0384e-04
Loss = 3.0383e-02, PNorm = 152.9425, GNorm = 0.4460, lr_0 = 6.0343e-04
Loss = 4.0274e-02, PNorm = 152.9782, GNorm = 0.4466, lr_0 = 6.0301e-04
Loss = 3.8733e-02, PNorm = 153.0192, GNorm = 0.6028, lr_0 = 6.0260e-04
Loss = 3.8537e-02, PNorm = 153.0561, GNorm = 0.6065, lr_0 = 6.0219e-04
Loss = 3.6086e-02, PNorm = 153.0928, GNorm = 0.4080, lr_0 = 6.0178e-04
Loss = 3.9030e-02, PNorm = 153.1299, GNorm = 0.3891, lr_0 = 6.0136e-04
Loss = 4.8738e-02, PNorm = 153.1694, GNorm = 0.4073, lr_0 = 6.0095e-04
Loss = 3.9136e-02, PNorm = 153.2075, GNorm = 0.4025, lr_0 = 6.0054e-04
Loss = 3.7496e-02, PNorm = 153.2479, GNorm = 0.4935, lr_0 = 6.0013e-04
Loss = 3.7512e-02, PNorm = 153.2874, GNorm = 0.2927, lr_0 = 5.9972e-04
Loss = 3.6841e-02, PNorm = 153.3277, GNorm = 0.3320, lr_0 = 5.9931e-04
Loss = 3.9797e-02, PNorm = 153.3675, GNorm = 0.2953, lr_0 = 5.9890e-04
Loss = 3.9967e-02, PNorm = 153.4040, GNorm = 0.7960, lr_0 = 5.9849e-04
Loss = 4.2582e-02, PNorm = 153.4445, GNorm = 0.4543, lr_0 = 5.9808e-04
Loss = 4.9319e-02, PNorm = 153.4782, GNorm = 0.4967, lr_0 = 5.9767e-04
Loss = 5.0007e-02, PNorm = 153.5200, GNorm = 0.4054, lr_0 = 5.9726e-04
Loss = 4.9955e-02, PNorm = 153.5610, GNorm = 0.2825, lr_0 = 5.9685e-04
Loss = 3.5232e-02, PNorm = 153.6005, GNorm = 0.5250, lr_0 = 5.9644e-04
Loss = 4.4423e-02, PNorm = 153.6449, GNorm = 0.6550, lr_0 = 5.9603e-04
Loss = 5.0780e-02, PNorm = 153.6917, GNorm = 0.3674, lr_0 = 5.9562e-04
Loss = 5.0164e-02, PNorm = 153.7388, GNorm = 0.6785, lr_0 = 5.9521e-04
Loss = 3.8477e-02, PNorm = 153.7835, GNorm = 0.8839, lr_0 = 5.9481e-04
Loss = 3.1657e-02, PNorm = 153.8234, GNorm = 0.5825, lr_0 = 5.9440e-04
Loss = 3.7970e-02, PNorm = 153.8725, GNorm = 0.6858, lr_0 = 5.9399e-04
Loss = 4.0417e-02, PNorm = 153.9165, GNorm = 0.4359, lr_0 = 5.9358e-04
Loss = 3.3905e-02, PNorm = 153.9624, GNorm = 0.6647, lr_0 = 5.9318e-04
Loss = 4.0939e-02, PNorm = 154.0053, GNorm = 0.3515, lr_0 = 5.9277e-04
Loss = 4.6926e-02, PNorm = 154.0511, GNorm = 0.6629, lr_0 = 5.9236e-04
Loss = 3.6396e-02, PNorm = 154.0973, GNorm = 0.4164, lr_0 = 5.9196e-04
Loss = 3.7757e-02, PNorm = 154.1463, GNorm = 0.3246, lr_0 = 5.9155e-04
Loss = 3.9134e-02, PNorm = 154.1905, GNorm = 0.6189, lr_0 = 5.9115e-04
Loss = 3.5055e-02, PNorm = 154.2308, GNorm = 0.3588, lr_0 = 5.9074e-04
Loss = 4.7746e-02, PNorm = 154.2766, GNorm = 0.3839, lr_0 = 5.9034e-04
Loss = 4.4720e-02, PNorm = 154.3209, GNorm = 0.4512, lr_0 = 5.8993e-04
Loss = 5.0740e-02, PNorm = 154.3705, GNorm = 0.4345, lr_0 = 5.8953e-04
Loss = 3.9047e-02, PNorm = 154.4270, GNorm = 0.2836, lr_0 = 5.8913e-04
Loss = 4.7998e-02, PNorm = 154.4735, GNorm = 0.4167, lr_0 = 5.8872e-04
Loss = 4.1854e-02, PNorm = 154.5162, GNorm = 0.6413, lr_0 = 5.8832e-04
Loss = 4.0141e-02, PNorm = 154.5591, GNorm = 0.7319, lr_0 = 5.8792e-04
Loss = 3.3165e-02, PNorm = 154.5959, GNorm = 0.3150, lr_0 = 5.8751e-04
Loss = 3.7806e-02, PNorm = 154.6383, GNorm = 0.6444, lr_0 = 5.8711e-04
Loss = 3.7197e-02, PNorm = 154.6842, GNorm = 0.3469, lr_0 = 5.8671e-04
Loss = 3.8611e-02, PNorm = 154.7313, GNorm = 0.3221, lr_0 = 5.8631e-04
Loss = 3.9340e-02, PNorm = 154.7755, GNorm = 0.4425, lr_0 = 5.8591e-04
Loss = 3.3909e-02, PNorm = 154.8225, GNorm = 0.7862, lr_0 = 5.8550e-04
Loss = 3.6175e-02, PNorm = 154.8714, GNorm = 0.3183, lr_0 = 5.8510e-04
Loss = 3.9050e-02, PNorm = 154.9181, GNorm = 0.3808, lr_0 = 5.8470e-04
Loss = 3.3805e-02, PNorm = 154.9697, GNorm = 0.6397, lr_0 = 5.8430e-04
Loss = 3.8258e-02, PNorm = 155.0114, GNorm = 0.6976, lr_0 = 5.8390e-04
Loss = 3.9211e-02, PNorm = 155.0538, GNorm = 0.5956, lr_0 = 5.8350e-04
Loss = 3.6709e-02, PNorm = 155.0982, GNorm = 0.3325, lr_0 = 5.8310e-04
Loss = 4.4791e-02, PNorm = 155.1381, GNorm = 0.4965, lr_0 = 5.8270e-04
Loss = 3.8213e-02, PNorm = 155.1833, GNorm = 0.3477, lr_0 = 5.8230e-04
Loss = 4.5336e-02, PNorm = 155.2261, GNorm = 0.4578, lr_0 = 5.8190e-04
Loss = 3.5799e-02, PNorm = 155.2741, GNorm = 0.4318, lr_0 = 5.8151e-04
Loss = 4.0860e-02, PNorm = 155.3198, GNorm = 0.4902, lr_0 = 5.8111e-04
Loss = 3.4192e-02, PNorm = 155.3683, GNorm = 0.4203, lr_0 = 5.8071e-04
Loss = 3.9442e-02, PNorm = 155.4134, GNorm = 0.3952, lr_0 = 5.8031e-04
Loss = 3.9202e-02, PNorm = 155.4583, GNorm = 0.2804, lr_0 = 5.7991e-04
Loss = 4.3630e-02, PNorm = 155.5044, GNorm = 0.7008, lr_0 = 5.7952e-04
Loss = 4.2081e-02, PNorm = 155.5547, GNorm = 0.3848, lr_0 = 5.7912e-04
Loss = 3.4920e-02, PNorm = 155.6045, GNorm = 0.2684, lr_0 = 5.7872e-04
Loss = 3.8973e-02, PNorm = 155.6523, GNorm = 0.4252, lr_0 = 5.7833e-04
Loss = 3.9042e-02, PNorm = 155.7035, GNorm = 0.4574, lr_0 = 5.7793e-04
Loss = 3.7284e-02, PNorm = 155.7572, GNorm = 0.8954, lr_0 = 5.7753e-04
Loss = 5.0649e-02, PNorm = 155.8001, GNorm = 0.3249, lr_0 = 5.7714e-04
Loss = 3.6538e-02, PNorm = 155.8418, GNorm = 0.5943, lr_0 = 5.7674e-04
Loss = 3.6559e-02, PNorm = 155.8918, GNorm = 0.6319, lr_0 = 5.7635e-04
Loss = 3.5931e-02, PNorm = 155.9375, GNorm = 0.4525, lr_0 = 5.7595e-04
Loss = 3.6265e-02, PNorm = 155.9877, GNorm = 0.3276, lr_0 = 5.7556e-04
Loss = 3.3718e-02, PNorm = 156.0329, GNorm = 0.5132, lr_0 = 5.7516e-04
Loss = 4.1648e-02, PNorm = 156.0778, GNorm = 0.6836, lr_0 = 5.7477e-04
Loss = 3.7684e-02, PNorm = 156.1280, GNorm = 0.7785, lr_0 = 5.7438e-04
Loss = 4.1959e-02, PNorm = 156.1739, GNorm = 0.2748, lr_0 = 5.7398e-04
Loss = 3.8938e-02, PNorm = 156.2238, GNorm = 0.3866, lr_0 = 5.7359e-04
Loss = 3.8302e-02, PNorm = 156.2690, GNorm = 0.3089, lr_0 = 5.7320e-04
Loss = 4.4403e-02, PNorm = 156.3187, GNorm = 0.7746, lr_0 = 5.7280e-04
Loss = 3.4672e-02, PNorm = 156.3689, GNorm = 0.3854, lr_0 = 5.7241e-04
Loss = 4.0094e-02, PNorm = 156.4150, GNorm = 1.2666, lr_0 = 5.7202e-04
Loss = 4.3352e-02, PNorm = 156.4668, GNorm = 0.6886, lr_0 = 5.7163e-04
Loss = 4.9603e-02, PNorm = 156.5180, GNorm = 0.3049, lr_0 = 5.7124e-04
Loss = 4.6166e-02, PNorm = 156.5738, GNorm = 0.6392, lr_0 = 5.7084e-04
Loss = 5.9944e-02, PNorm = 156.6312, GNorm = 0.9331, lr_0 = 5.7045e-04
Loss = 4.7570e-02, PNorm = 156.6843, GNorm = 1.0925, lr_0 = 5.7006e-04
Loss = 4.7859e-02, PNorm = 156.7437, GNorm = 0.7009, lr_0 = 5.6967e-04
Loss = 3.9625e-02, PNorm = 156.7998, GNorm = 0.4685, lr_0 = 5.6928e-04
Loss = 5.3645e-02, PNorm = 156.8613, GNorm = 0.6387, lr_0 = 5.6889e-04
Loss = 5.7263e-02, PNorm = 156.9188, GNorm = 0.5959, lr_0 = 5.6850e-04
Loss = 4.5204e-02, PNorm = 156.9738, GNorm = 0.6114, lr_0 = 5.6811e-04
Loss = 4.2685e-02, PNorm = 157.0265, GNorm = 0.5265, lr_0 = 5.6772e-04
Loss = 4.0530e-02, PNorm = 157.0783, GNorm = 0.5657, lr_0 = 5.6733e-04
Loss = 3.9624e-02, PNorm = 157.1353, GNorm = 0.7514, lr_0 = 5.6695e-04
Loss = 4.8420e-02, PNorm = 157.1912, GNorm = 0.4064, lr_0 = 5.6656e-04
Loss = 3.9364e-02, PNorm = 157.2458, GNorm = 0.3460, lr_0 = 5.6617e-04
Loss = 4.4612e-02, PNorm = 157.2958, GNorm = 0.4245, lr_0 = 5.6578e-04
Loss = 4.4341e-02, PNorm = 157.3437, GNorm = 0.4967, lr_0 = 5.6539e-04
Loss = 4.5758e-02, PNorm = 157.3970, GNorm = 0.8017, lr_0 = 5.6501e-04
Loss = 5.1675e-02, PNorm = 157.4401, GNorm = 0.4671, lr_0 = 5.6462e-04
Loss = 3.8886e-02, PNorm = 157.4905, GNorm = 0.9952, lr_0 = 5.6423e-04
Loss = 4.6395e-02, PNorm = 157.5415, GNorm = 0.6440, lr_0 = 5.6385e-04
Loss = 4.6345e-02, PNorm = 157.5926, GNorm = 0.4574, lr_0 = 5.6346e-04
Loss = 4.3018e-02, PNorm = 157.6465, GNorm = 0.3127, lr_0 = 5.6307e-04
Loss = 4.4255e-02, PNorm = 157.6930, GNorm = 0.3430, lr_0 = 5.6269e-04
Loss = 3.3696e-02, PNorm = 157.7415, GNorm = 0.2574, lr_0 = 5.6230e-04
Validation mae = 0.122441
Epoch 9
Loss = 3.8492e-02, PNorm = 157.7772, GNorm = 0.3937, lr_0 = 5.6192e-04
Loss = 3.3474e-02, PNorm = 157.8188, GNorm = 0.2709, lr_0 = 5.6153e-04
Loss = 3.4880e-02, PNorm = 157.8515, GNorm = 0.4009, lr_0 = 5.6115e-04
Loss = 2.9629e-02, PNorm = 157.8780, GNorm = 0.3751, lr_0 = 5.6076e-04
Loss = 3.2849e-02, PNorm = 157.9092, GNorm = 0.3059, lr_0 = 5.6038e-04
Loss = 3.0634e-02, PNorm = 157.9428, GNorm = 0.5917, lr_0 = 5.6000e-04
Loss = 3.9654e-02, PNorm = 157.9767, GNorm = 0.3207, lr_0 = 5.5961e-04
Loss = 3.6079e-02, PNorm = 158.0110, GNorm = 0.5819, lr_0 = 5.5923e-04
Loss = 3.4383e-02, PNorm = 158.0477, GNorm = 0.6325, lr_0 = 5.5885e-04
Loss = 3.8012e-02, PNorm = 158.0819, GNorm = 0.6562, lr_0 = 5.5846e-04
Loss = 4.2145e-02, PNorm = 158.1181, GNorm = 0.3273, lr_0 = 5.5808e-04
Loss = 3.6789e-02, PNorm = 158.1554, GNorm = 0.3790, lr_0 = 5.5770e-04
Loss = 4.2179e-02, PNorm = 158.1887, GNorm = 0.2364, lr_0 = 5.5732e-04
Loss = 3.9196e-02, PNorm = 158.2166, GNorm = 0.3728, lr_0 = 5.5693e-04
Loss = 4.3638e-02, PNorm = 158.2617, GNorm = 0.4696, lr_0 = 5.5655e-04
Loss = 4.4378e-02, PNorm = 158.2952, GNorm = 0.3024, lr_0 = 5.5617e-04
Loss = 3.3433e-02, PNorm = 158.3318, GNorm = 0.8282, lr_0 = 5.5579e-04
Loss = 3.7534e-02, PNorm = 158.3676, GNorm = 0.5996, lr_0 = 5.5541e-04
Loss = 2.9947e-02, PNorm = 158.4009, GNorm = 0.4361, lr_0 = 5.5503e-04
Loss = 4.8314e-02, PNorm = 158.4307, GNorm = 0.4175, lr_0 = 5.5465e-04
Loss = 2.6407e-02, PNorm = 158.4687, GNorm = 0.3661, lr_0 = 5.5427e-04
Loss = 3.4669e-02, PNorm = 158.5042, GNorm = 0.3845, lr_0 = 5.5389e-04
Loss = 2.9420e-02, PNorm = 158.5422, GNorm = 0.5628, lr_0 = 5.5351e-04
Loss = 4.0182e-02, PNorm = 158.5773, GNorm = 0.5141, lr_0 = 5.5313e-04
Loss = 3.5069e-02, PNorm = 158.6149, GNorm = 0.3314, lr_0 = 5.5275e-04
Loss = 3.0401e-02, PNorm = 158.6525, GNorm = 0.5145, lr_0 = 5.5237e-04
Loss = 3.5466e-02, PNorm = 158.6888, GNorm = 0.3939, lr_0 = 5.5199e-04
Loss = 3.7049e-02, PNorm = 158.7270, GNorm = 0.2994, lr_0 = 5.5162e-04
Loss = 3.1640e-02, PNorm = 158.7643, GNorm = 0.3644, lr_0 = 5.5124e-04
Loss = 3.2181e-02, PNorm = 158.7995, GNorm = 0.4774, lr_0 = 5.5086e-04
Loss = 3.3902e-02, PNorm = 158.8349, GNorm = 0.2293, lr_0 = 5.5048e-04
Loss = 2.8346e-02, PNorm = 158.8700, GNorm = 0.3841, lr_0 = 5.5011e-04
Loss = 2.9701e-02, PNorm = 158.9005, GNorm = 0.2796, lr_0 = 5.4973e-04
Loss = 2.9148e-02, PNorm = 158.9286, GNorm = 0.4168, lr_0 = 5.4935e-04
Loss = 2.6443e-02, PNorm = 158.9630, GNorm = 0.4218, lr_0 = 5.4898e-04
Loss = 3.0290e-02, PNorm = 158.9989, GNorm = 0.6840, lr_0 = 5.4860e-04
Loss = 3.4520e-02, PNorm = 159.0307, GNorm = 0.5053, lr_0 = 5.4822e-04
Loss = 3.2092e-02, PNorm = 159.0675, GNorm = 0.3012, lr_0 = 5.4785e-04
Loss = 3.9128e-02, PNorm = 159.1037, GNorm = 0.3358, lr_0 = 5.4747e-04
Loss = 3.5454e-02, PNorm = 159.1398, GNorm = 0.3570, lr_0 = 5.4710e-04
Loss = 3.2380e-02, PNorm = 159.1726, GNorm = 0.3951, lr_0 = 5.4672e-04
Loss = 3.0961e-02, PNorm = 159.2082, GNorm = 0.2574, lr_0 = 5.4635e-04
Loss = 3.5390e-02, PNorm = 159.2421, GNorm = 0.4032, lr_0 = 5.4597e-04
Loss = 3.2485e-02, PNorm = 159.2800, GNorm = 0.5398, lr_0 = 5.4560e-04
Loss = 3.5123e-02, PNorm = 159.3172, GNorm = 0.2411, lr_0 = 5.4523e-04
Loss = 3.2094e-02, PNorm = 159.3581, GNorm = 0.2859, lr_0 = 5.4485e-04
Loss = 2.8357e-02, PNorm = 159.3938, GNorm = 0.6692, lr_0 = 5.4448e-04
Loss = 4.3208e-02, PNorm = 159.4308, GNorm = 0.3831, lr_0 = 5.4411e-04
Loss = 3.6915e-02, PNorm = 159.4734, GNorm = 0.4114, lr_0 = 5.4373e-04
Loss = 3.3324e-02, PNorm = 159.5126, GNorm = 0.5857, lr_0 = 5.4336e-04
Loss = 3.2286e-02, PNorm = 159.5514, GNorm = 0.3419, lr_0 = 5.4299e-04
Loss = 3.1164e-02, PNorm = 159.5912, GNorm = 0.3143, lr_0 = 5.4262e-04
Loss = 2.6088e-02, PNorm = 159.6296, GNorm = 0.4482, lr_0 = 5.4225e-04
Loss = 3.4175e-02, PNorm = 159.6634, GNorm = 0.7003, lr_0 = 5.4187e-04
Loss = 3.0462e-02, PNorm = 159.6983, GNorm = 0.3109, lr_0 = 5.4150e-04
Loss = 3.4110e-02, PNorm = 159.7338, GNorm = 0.3798, lr_0 = 5.4113e-04
Loss = 2.8259e-02, PNorm = 159.7732, GNorm = 0.5192, lr_0 = 5.4076e-04
Loss = 2.4301e-02, PNorm = 159.8094, GNorm = 0.1816, lr_0 = 5.4039e-04
Loss = 4.8533e-02, PNorm = 159.8449, GNorm = 0.5094, lr_0 = 5.4002e-04
Loss = 3.5029e-02, PNorm = 159.8775, GNorm = 0.4293, lr_0 = 5.3965e-04
Loss = 3.2443e-02, PNorm = 159.9139, GNorm = 0.4902, lr_0 = 5.3928e-04
Loss = 2.7625e-02, PNorm = 159.9522, GNorm = 0.3187, lr_0 = 5.3891e-04
Loss = 4.1740e-02, PNorm = 159.9887, GNorm = 0.4113, lr_0 = 5.3854e-04
Loss = 3.0603e-02, PNorm = 160.0238, GNorm = 0.3682, lr_0 = 5.3817e-04
Loss = 3.2190e-02, PNorm = 160.0607, GNorm = 1.2140, lr_0 = 5.3781e-04
Loss = 2.8363e-02, PNorm = 160.0983, GNorm = 0.4017, lr_0 = 5.3744e-04
Loss = 3.4833e-02, PNorm = 160.1310, GNorm = 0.3847, lr_0 = 5.3707e-04
Loss = 2.9840e-02, PNorm = 160.1638, GNorm = 0.6557, lr_0 = 5.3670e-04
Loss = 3.0143e-02, PNorm = 160.1956, GNorm = 0.5387, lr_0 = 5.3633e-04
Loss = 3.0369e-02, PNorm = 160.2307, GNorm = 0.1885, lr_0 = 5.3597e-04
Loss = 2.8200e-02, PNorm = 160.2705, GNorm = 0.4333, lr_0 = 5.3560e-04
Loss = 3.2580e-02, PNorm = 160.3076, GNorm = 0.3559, lr_0 = 5.3523e-04
Loss = 3.8587e-02, PNorm = 160.3513, GNorm = 0.3850, lr_0 = 5.3486e-04
Loss = 3.3229e-02, PNorm = 160.3911, GNorm = 0.2604, lr_0 = 5.3450e-04
Loss = 3.6036e-02, PNorm = 160.4262, GNorm = 0.3610, lr_0 = 5.3413e-04
Loss = 3.1787e-02, PNorm = 160.4628, GNorm = 0.6919, lr_0 = 5.3377e-04
Loss = 3.1630e-02, PNorm = 160.5033, GNorm = 0.3848, lr_0 = 5.3340e-04
Loss = 2.6002e-02, PNorm = 160.5408, GNorm = 0.2049, lr_0 = 5.3304e-04
Loss = 3.1509e-02, PNorm = 160.5813, GNorm = 0.2403, lr_0 = 5.3267e-04
Loss = 3.5352e-02, PNorm = 160.6214, GNorm = 0.3710, lr_0 = 5.3231e-04
Loss = 2.8063e-02, PNorm = 160.6592, GNorm = 0.3061, lr_0 = 5.3194e-04
Loss = 3.1300e-02, PNorm = 160.6980, GNorm = 0.2727, lr_0 = 5.3158e-04
Loss = 2.9732e-02, PNorm = 160.7386, GNorm = 0.2793, lr_0 = 5.3121e-04
Loss = 3.4433e-02, PNorm = 160.7738, GNorm = 1.0474, lr_0 = 5.3085e-04
Loss = 2.6499e-02, PNorm = 160.8092, GNorm = 0.4044, lr_0 = 5.3048e-04
Loss = 5.2425e-02, PNorm = 160.8508, GNorm = 0.3634, lr_0 = 5.3012e-04
Loss = 3.3466e-02, PNorm = 160.8946, GNorm = 0.5542, lr_0 = 5.2976e-04
Loss = 3.8575e-02, PNorm = 160.9414, GNorm = 0.7973, lr_0 = 5.2939e-04
Loss = 4.1617e-02, PNorm = 160.9814, GNorm = 0.3916, lr_0 = 5.2903e-04
Loss = 2.9133e-02, PNorm = 161.0205, GNorm = 0.4912, lr_0 = 5.2867e-04
Loss = 4.4036e-02, PNorm = 161.0618, GNorm = 0.8236, lr_0 = 5.2831e-04
Loss = 3.1832e-02, PNorm = 161.1016, GNorm = 0.6582, lr_0 = 5.2795e-04
Loss = 3.7207e-02, PNorm = 161.1450, GNorm = 0.9593, lr_0 = 5.2758e-04
Loss = 3.3136e-02, PNorm = 161.1849, GNorm = 0.2912, lr_0 = 5.2722e-04
Loss = 3.2669e-02, PNorm = 161.2256, GNorm = 0.4615, lr_0 = 5.2686e-04
Loss = 3.2524e-02, PNorm = 161.2690, GNorm = 0.4651, lr_0 = 5.2650e-04
Loss = 3.1822e-02, PNorm = 161.3160, GNorm = 0.3592, lr_0 = 5.2614e-04
Loss = 3.1682e-02, PNorm = 161.3617, GNorm = 0.2703, lr_0 = 5.2578e-04
Loss = 3.1093e-02, PNorm = 161.3999, GNorm = 0.2341, lr_0 = 5.2542e-04
Loss = 2.7918e-02, PNorm = 161.4437, GNorm = 0.2594, lr_0 = 5.2506e-04
Loss = 3.1752e-02, PNorm = 161.4861, GNorm = 0.2485, lr_0 = 5.2470e-04
Loss = 3.3890e-02, PNorm = 161.5293, GNorm = 0.6980, lr_0 = 5.2434e-04
Loss = 4.4928e-02, PNorm = 161.5713, GNorm = 0.3366, lr_0 = 5.2398e-04
Loss = 3.2432e-02, PNorm = 161.6134, GNorm = 0.3998, lr_0 = 5.2362e-04
Loss = 2.9447e-02, PNorm = 161.6530, GNorm = 0.3510, lr_0 = 5.2326e-04
Loss = 3.7202e-02, PNorm = 161.6899, GNorm = 0.2289, lr_0 = 5.2290e-04
Loss = 3.5498e-02, PNorm = 161.7292, GNorm = 0.7632, lr_0 = 5.2255e-04
Loss = 2.8742e-02, PNorm = 161.7649, GNorm = 0.3154, lr_0 = 5.2219e-04
Loss = 3.4581e-02, PNorm = 161.8043, GNorm = 0.2141, lr_0 = 5.2183e-04
Loss = 2.8160e-02, PNorm = 161.8435, GNorm = 0.2226, lr_0 = 5.2147e-04
Loss = 3.1355e-02, PNorm = 161.8848, GNorm = 0.4402, lr_0 = 5.2112e-04
Loss = 3.3598e-02, PNorm = 161.9253, GNorm = 0.4648, lr_0 = 5.2076e-04
Loss = 4.0070e-02, PNorm = 161.9706, GNorm = 0.4418, lr_0 = 5.2040e-04
Loss = 3.2445e-02, PNorm = 162.0116, GNorm = 0.7179, lr_0 = 5.2005e-04
Loss = 3.2044e-02, PNorm = 162.0516, GNorm = 0.6272, lr_0 = 5.1969e-04
Loss = 2.9512e-02, PNorm = 162.0940, GNorm = 0.3553, lr_0 = 5.1933e-04
Loss = 3.5915e-02, PNorm = 162.1314, GNorm = 0.2383, lr_0 = 5.1898e-04
Loss = 3.2843e-02, PNorm = 162.1744, GNorm = 0.2123, lr_0 = 5.1862e-04
Loss = 3.0829e-02, PNorm = 162.2194, GNorm = 0.3564, lr_0 = 5.1827e-04
Loss = 3.1011e-02, PNorm = 162.2649, GNorm = 0.2494, lr_0 = 5.1791e-04
Validation mae = 0.122452
Epoch 10
Loss = 3.8594e-02, PNorm = 162.3017, GNorm = 0.3510, lr_0 = 5.1756e-04
Loss = 2.5331e-02, PNorm = 162.3389, GNorm = 0.3961, lr_0 = 5.1720e-04
Loss = 3.9690e-02, PNorm = 162.3775, GNorm = 0.4834, lr_0 = 5.1685e-04
Loss = 3.9587e-02, PNorm = 162.4169, GNorm = 0.3306, lr_0 = 5.1649e-04
Loss = 2.6690e-02, PNorm = 162.4537, GNorm = 0.3684, lr_0 = 5.1614e-04
Loss = 3.0037e-02, PNorm = 162.4831, GNorm = 0.8190, lr_0 = 5.1579e-04
Loss = 2.7999e-02, PNorm = 162.5144, GNorm = 0.4528, lr_0 = 5.1543e-04
Loss = 2.5670e-02, PNorm = 162.5414, GNorm = 0.5082, lr_0 = 5.1508e-04
Loss = 2.8820e-02, PNorm = 162.5674, GNorm = 0.4977, lr_0 = 5.1473e-04
Loss = 3.2564e-02, PNorm = 162.5926, GNorm = 0.2879, lr_0 = 5.1437e-04
Loss = 3.1186e-02, PNorm = 162.6173, GNorm = 0.2861, lr_0 = 5.1402e-04
Loss = 2.8037e-02, PNorm = 162.6453, GNorm = 0.3795, lr_0 = 5.1367e-04
Loss = 3.2349e-02, PNorm = 162.6777, GNorm = 0.6507, lr_0 = 5.1332e-04
Loss = 2.1605e-02, PNorm = 162.7066, GNorm = 0.2151, lr_0 = 5.1297e-04
Loss = 2.6203e-02, PNorm = 162.7365, GNorm = 0.3557, lr_0 = 5.1262e-04
Loss = 3.2242e-02, PNorm = 162.7655, GNorm = 0.1797, lr_0 = 5.1226e-04
Loss = 2.8812e-02, PNorm = 162.7855, GNorm = 0.5079, lr_0 = 5.1191e-04
Loss = 2.7815e-02, PNorm = 162.8093, GNorm = 0.6168, lr_0 = 5.1156e-04
Loss = 2.4658e-02, PNorm = 162.8392, GNorm = 0.3264, lr_0 = 5.1121e-04
Loss = 3.4589e-02, PNorm = 162.8703, GNorm = 0.3324, lr_0 = 5.1086e-04
Loss = 2.9399e-02, PNorm = 162.9042, GNorm = 0.2745, lr_0 = 5.1051e-04
Loss = 2.4105e-02, PNorm = 162.9361, GNorm = 0.2763, lr_0 = 5.1016e-04
Loss = 2.3963e-02, PNorm = 162.9645, GNorm = 0.2700, lr_0 = 5.0981e-04
Loss = 2.3775e-02, PNorm = 162.9920, GNorm = 0.3344, lr_0 = 5.0946e-04
Loss = 2.7300e-02, PNorm = 163.0192, GNorm = 0.3139, lr_0 = 5.0911e-04
Loss = 2.3133e-02, PNorm = 163.0521, GNorm = 0.8044, lr_0 = 5.0877e-04
Loss = 2.4517e-02, PNorm = 163.0812, GNorm = 0.2723, lr_0 = 5.0842e-04
Loss = 2.0923e-02, PNorm = 163.1084, GNorm = 0.2279, lr_0 = 5.0807e-04
Loss = 2.4790e-02, PNorm = 163.1369, GNorm = 0.3548, lr_0 = 5.0772e-04
Loss = 2.4139e-02, PNorm = 163.1640, GNorm = 0.3097, lr_0 = 5.0737e-04
Loss = 1.6413e-02, PNorm = 163.1901, GNorm = 0.3907, lr_0 = 5.0703e-04
Loss = 2.7746e-02, PNorm = 163.2203, GNorm = 0.2883, lr_0 = 5.0668e-04
Loss = 2.2542e-02, PNorm = 163.2464, GNorm = 0.4921, lr_0 = 5.0633e-04
Loss = 3.0776e-02, PNorm = 163.2747, GNorm = 0.2623, lr_0 = 5.0598e-04
Loss = 3.4841e-02, PNorm = 163.3034, GNorm = 1.1480, lr_0 = 5.0564e-04
Loss = 3.1277e-02, PNorm = 163.3302, GNorm = 0.9217, lr_0 = 5.0529e-04
Loss = 2.5261e-02, PNorm = 163.3699, GNorm = 0.3262, lr_0 = 5.0494e-04
Loss = 2.7743e-02, PNorm = 163.4040, GNorm = 0.5887, lr_0 = 5.0460e-04
Loss = 3.7725e-02, PNorm = 163.4430, GNorm = 0.3463, lr_0 = 5.0425e-04
Loss = 2.7250e-02, PNorm = 163.4770, GNorm = 0.4512, lr_0 = 5.0391e-04
Loss = 2.2630e-02, PNorm = 163.5133, GNorm = 0.3416, lr_0 = 5.0356e-04
Loss = 2.3420e-02, PNorm = 163.5441, GNorm = 0.2354, lr_0 = 5.0322e-04
Loss = 2.2929e-02, PNorm = 163.5672, GNorm = 0.3409, lr_0 = 5.0287e-04
Loss = 2.1342e-02, PNorm = 163.5938, GNorm = 0.2437, lr_0 = 5.0253e-04
Loss = 2.8873e-02, PNorm = 163.6245, GNorm = 0.4132, lr_0 = 5.0218e-04
Loss = 2.5555e-02, PNorm = 163.6558, GNorm = 0.2603, lr_0 = 5.0184e-04
Loss = 2.3623e-02, PNorm = 163.6921, GNorm = 0.2923, lr_0 = 5.0150e-04
Loss = 2.3524e-02, PNorm = 163.7230, GNorm = 1.0474, lr_0 = 5.0115e-04
Loss = 2.8155e-02, PNorm = 163.7495, GNorm = 0.2678, lr_0 = 5.0081e-04
Loss = 2.4281e-02, PNorm = 163.7791, GNorm = 0.3221, lr_0 = 5.0047e-04
Loss = 2.0702e-02, PNorm = 163.8048, GNorm = 0.2512, lr_0 = 5.0012e-04
Loss = 2.1201e-02, PNorm = 163.8326, GNorm = 0.4035, lr_0 = 4.9978e-04
Loss = 3.2449e-02, PNorm = 163.8610, GNorm = 0.3643, lr_0 = 4.9944e-04
Loss = 2.3624e-02, PNorm = 163.8937, GNorm = 0.8418, lr_0 = 4.9910e-04
Loss = 2.8265e-02, PNorm = 163.9244, GNorm = 0.2251, lr_0 = 4.9875e-04
Loss = 2.5166e-02, PNorm = 163.9533, GNorm = 0.2513, lr_0 = 4.9841e-04
Loss = 3.1054e-02, PNorm = 163.9853, GNorm = 0.4247, lr_0 = 4.9807e-04
Loss = 3.0798e-02, PNorm = 164.0195, GNorm = 0.3387, lr_0 = 4.9773e-04
Loss = 2.1707e-02, PNorm = 164.0512, GNorm = 0.1940, lr_0 = 4.9739e-04
Loss = 2.7197e-02, PNorm = 164.0800, GNorm = 0.3208, lr_0 = 4.9705e-04
Loss = 2.5376e-02, PNorm = 164.1083, GNorm = 0.7221, lr_0 = 4.9671e-04
Loss = 2.1551e-02, PNorm = 164.1387, GNorm = 0.2087, lr_0 = 4.9637e-04
Loss = 2.3472e-02, PNorm = 164.1693, GNorm = 0.4080, lr_0 = 4.9603e-04
Loss = 2.0798e-02, PNorm = 164.2017, GNorm = 0.5443, lr_0 = 4.9569e-04
Loss = 3.5362e-02, PNorm = 164.2378, GNorm = 0.3314, lr_0 = 4.9535e-04
Loss = 2.4283e-02, PNorm = 164.2747, GNorm = 0.3955, lr_0 = 4.9501e-04
Loss = 2.9921e-02, PNorm = 164.3102, GNorm = 0.3056, lr_0 = 4.9467e-04
Loss = 3.4206e-02, PNorm = 164.3415, GNorm = 0.3441, lr_0 = 4.9433e-04
Loss = 2.6085e-02, PNorm = 164.3756, GNorm = 0.4809, lr_0 = 4.9399e-04
Loss = 3.2134e-02, PNorm = 164.4084, GNorm = 0.2707, lr_0 = 4.9365e-04
Loss = 2.7426e-02, PNorm = 164.4431, GNorm = 0.4978, lr_0 = 4.9332e-04
Loss = 2.6517e-02, PNorm = 164.4782, GNorm = 0.5516, lr_0 = 4.9298e-04
Loss = 3.0878e-02, PNorm = 164.5141, GNorm = 0.4950, lr_0 = 4.9264e-04
Loss = 2.9869e-02, PNorm = 164.5483, GNorm = 0.4024, lr_0 = 4.9230e-04
Loss = 2.6046e-02, PNorm = 164.5838, GNorm = 0.2909, lr_0 = 4.9197e-04
Loss = 2.2387e-02, PNorm = 164.6167, GNorm = 0.3267, lr_0 = 4.9163e-04
Loss = 2.5304e-02, PNorm = 164.6516, GNorm = 0.2958, lr_0 = 4.9129e-04
Loss = 2.2931e-02, PNorm = 164.6872, GNorm = 0.2952, lr_0 = 4.9095e-04
Loss = 2.1534e-02, PNorm = 164.7191, GNorm = 0.3480, lr_0 = 4.9062e-04
Loss = 2.4134e-02, PNorm = 164.7485, GNorm = 0.3862, lr_0 = 4.9028e-04
Loss = 2.2629e-02, PNorm = 164.7786, GNorm = 0.5909, lr_0 = 4.8995e-04
Loss = 2.9145e-02, PNorm = 164.8136, GNorm = 0.7384, lr_0 = 4.8961e-04
Loss = 2.5218e-02, PNorm = 164.8460, GNorm = 0.3898, lr_0 = 4.8928e-04
Loss = 2.5646e-02, PNorm = 164.8809, GNorm = 0.3839, lr_0 = 4.8894e-04
Loss = 3.3537e-02, PNorm = 164.9121, GNorm = 0.3091, lr_0 = 4.8861e-04
Loss = 2.6304e-02, PNorm = 164.9474, GNorm = 0.3154, lr_0 = 4.8827e-04
Loss = 2.9346e-02, PNorm = 164.9776, GNorm = 0.3921, lr_0 = 4.8794e-04
Loss = 2.4102e-02, PNorm = 165.0108, GNorm = 0.2587, lr_0 = 4.8760e-04
Loss = 2.7903e-02, PNorm = 165.0451, GNorm = 0.3292, lr_0 = 4.8727e-04
Loss = 3.0983e-02, PNorm = 165.0774, GNorm = 0.4114, lr_0 = 4.8693e-04
Loss = 2.7030e-02, PNorm = 165.1164, GNorm = 0.2701, lr_0 = 4.8660e-04
Loss = 3.4381e-02, PNorm = 165.1523, GNorm = 0.5213, lr_0 = 4.8627e-04
Loss = 2.7700e-02, PNorm = 165.1880, GNorm = 0.9390, lr_0 = 4.8593e-04
Loss = 2.4736e-02, PNorm = 165.2263, GNorm = 0.5720, lr_0 = 4.8560e-04
Loss = 1.7984e-02, PNorm = 165.2608, GNorm = 0.4971, lr_0 = 4.8527e-04
Loss = 2.4839e-02, PNorm = 165.2977, GNorm = 0.5270, lr_0 = 4.8494e-04
Loss = 2.7788e-02, PNorm = 165.3281, GNorm = 0.1965, lr_0 = 4.8460e-04
Loss = 2.9519e-02, PNorm = 165.3600, GNorm = 0.2586, lr_0 = 4.8427e-04
Loss = 3.4443e-02, PNorm = 165.3950, GNorm = 0.7742, lr_0 = 4.8394e-04
Loss = 3.2476e-02, PNorm = 165.4376, GNorm = 0.3825, lr_0 = 4.8361e-04
Loss = 2.7267e-02, PNorm = 165.4746, GNorm = 0.3480, lr_0 = 4.8328e-04
Loss = 2.7500e-02, PNorm = 165.5111, GNorm = 0.5014, lr_0 = 4.8295e-04
Loss = 3.3668e-02, PNorm = 165.5464, GNorm = 0.3127, lr_0 = 4.8262e-04
Loss = 2.5718e-02, PNorm = 165.5786, GNorm = 0.2243, lr_0 = 4.8228e-04
Loss = 2.7499e-02, PNorm = 165.6158, GNorm = 0.3401, lr_0 = 4.8195e-04
Loss = 2.5512e-02, PNorm = 165.6529, GNorm = 0.8675, lr_0 = 4.8162e-04
Loss = 2.5797e-02, PNorm = 165.6878, GNorm = 0.5658, lr_0 = 4.8129e-04
Loss = 3.0302e-02, PNorm = 165.7200, GNorm = 0.7155, lr_0 = 4.8096e-04
Loss = 2.3132e-02, PNorm = 165.7588, GNorm = 0.2151, lr_0 = 4.8064e-04
Loss = 3.3614e-02, PNorm = 165.7922, GNorm = 0.7438, lr_0 = 4.8031e-04
Loss = 3.0747e-02, PNorm = 165.8296, GNorm = 0.6201, lr_0 = 4.7998e-04
Loss = 3.0595e-02, PNorm = 165.8678, GNorm = 0.4084, lr_0 = 4.7965e-04
Loss = 2.5920e-02, PNorm = 165.9047, GNorm = 0.2857, lr_0 = 4.7932e-04
Loss = 2.5960e-02, PNorm = 165.9392, GNorm = 0.2517, lr_0 = 4.7899e-04
Loss = 3.2276e-02, PNorm = 165.9741, GNorm = 0.2959, lr_0 = 4.7866e-04
Loss = 2.7488e-02, PNorm = 166.0134, GNorm = 0.3299, lr_0 = 4.7833e-04
Loss = 2.4992e-02, PNorm = 166.0510, GNorm = 0.6071, lr_0 = 4.7801e-04
Loss = 3.2929e-02, PNorm = 166.0896, GNorm = 0.5692, lr_0 = 4.7768e-04
Loss = 2.9357e-02, PNorm = 166.1241, GNorm = 0.2932, lr_0 = 4.7735e-04
Loss = 2.5349e-02, PNorm = 166.1575, GNorm = 0.4161, lr_0 = 4.7703e-04
Validation mae = 0.122306
Epoch 11
Loss = 1.9467e-02, PNorm = 166.1881, GNorm = 0.4419, lr_0 = 4.7670e-04
Loss = 1.8839e-02, PNorm = 166.2101, GNorm = 0.1866, lr_0 = 4.7637e-04
Loss = 2.3267e-02, PNorm = 166.2354, GNorm = 0.3593, lr_0 = 4.7605e-04
Loss = 2.2333e-02, PNorm = 166.2532, GNorm = 0.9597, lr_0 = 4.7572e-04
Loss = 2.4413e-02, PNorm = 166.2773, GNorm = 0.2967, lr_0 = 4.7539e-04
Loss = 2.6815e-02, PNorm = 166.3005, GNorm = 0.4895, lr_0 = 4.7507e-04
Loss = 2.4561e-02, PNorm = 166.3262, GNorm = 0.1655, lr_0 = 4.7474e-04
Loss = 2.7864e-02, PNorm = 166.3537, GNorm = 0.5286, lr_0 = 4.7442e-04
Loss = 2.8594e-02, PNorm = 166.3754, GNorm = 0.3432, lr_0 = 4.7409e-04
Loss = 2.6205e-02, PNorm = 166.3984, GNorm = 0.3582, lr_0 = 4.7377e-04
Loss = 2.2732e-02, PNorm = 166.4195, GNorm = 0.1925, lr_0 = 4.7344e-04
Loss = 2.2315e-02, PNorm = 166.4430, GNorm = 0.2472, lr_0 = 4.7312e-04
Loss = 2.0668e-02, PNorm = 166.4642, GNorm = 0.2182, lr_0 = 4.7279e-04
Loss = 3.0289e-02, PNorm = 166.4901, GNorm = 0.7420, lr_0 = 4.7247e-04
Loss = 2.2165e-02, PNorm = 166.5225, GNorm = 0.3402, lr_0 = 4.7215e-04
Loss = 2.5831e-02, PNorm = 166.5517, GNorm = 0.4038, lr_0 = 4.7182e-04
Loss = 2.0256e-02, PNorm = 166.5712, GNorm = 0.4620, lr_0 = 4.7150e-04
Loss = 1.8952e-02, PNorm = 166.5899, GNorm = 0.2964, lr_0 = 4.7118e-04
Loss = 2.1118e-02, PNorm = 166.6104, GNorm = 0.6551, lr_0 = 4.7085e-04
Loss = 1.9619e-02, PNorm = 166.6324, GNorm = 0.2345, lr_0 = 4.7053e-04
Loss = 2.3015e-02, PNorm = 166.6574, GNorm = 0.2707, lr_0 = 4.7021e-04
Loss = 2.0590e-02, PNorm = 166.6840, GNorm = 0.3826, lr_0 = 4.6989e-04
Loss = 2.5619e-02, PNorm = 166.7087, GNorm = 0.5898, lr_0 = 4.6957e-04
Loss = 2.6393e-02, PNorm = 166.7338, GNorm = 0.3455, lr_0 = 4.6924e-04
Loss = 2.1131e-02, PNorm = 166.7556, GNorm = 0.2612, lr_0 = 4.6892e-04
Loss = 1.8711e-02, PNorm = 166.7798, GNorm = 0.3176, lr_0 = 4.6860e-04
Loss = 2.6209e-02, PNorm = 166.8063, GNorm = 0.3921, lr_0 = 4.6828e-04
Loss = 2.8023e-02, PNorm = 166.8322, GNorm = 0.3487, lr_0 = 4.6796e-04
Loss = 2.4131e-02, PNorm = 166.8582, GNorm = 0.2737, lr_0 = 4.6764e-04
Loss = 2.0968e-02, PNorm = 166.8850, GNorm = 0.3634, lr_0 = 4.6732e-04
Loss = 2.2441e-02, PNorm = 166.9089, GNorm = 0.5272, lr_0 = 4.6700e-04
Loss = 2.5753e-02, PNorm = 166.9357, GNorm = 0.2971, lr_0 = 4.6668e-04
Loss = 2.0861e-02, PNorm = 166.9635, GNorm = 0.3182, lr_0 = 4.6636e-04
Loss = 2.4006e-02, PNorm = 166.9892, GNorm = 0.6759, lr_0 = 4.6604e-04
Loss = 2.0592e-02, PNorm = 167.0224, GNorm = 0.3355, lr_0 = 4.6572e-04
Loss = 2.2493e-02, PNorm = 167.0508, GNorm = 0.2346, lr_0 = 4.6540e-04
Loss = 3.2760e-02, PNorm = 167.0836, GNorm = 2.0773, lr_0 = 4.6508e-04
Loss = 2.1738e-02, PNorm = 167.1076, GNorm = 0.3686, lr_0 = 4.6476e-04
Loss = 1.7132e-02, PNorm = 167.1335, GNorm = 0.2900, lr_0 = 4.6445e-04
Loss = 2.3954e-02, PNorm = 167.1564, GNorm = 0.3693, lr_0 = 4.6413e-04
Loss = 2.5619e-02, PNorm = 167.1759, GNorm = 0.2056, lr_0 = 4.6381e-04
Loss = 2.2294e-02, PNorm = 167.1979, GNorm = 0.2015, lr_0 = 4.6349e-04
Loss = 2.2365e-02, PNorm = 167.2221, GNorm = 0.3109, lr_0 = 4.6317e-04
Loss = 1.7763e-02, PNorm = 167.2480, GNorm = 0.3428, lr_0 = 4.6286e-04
Loss = 2.0246e-02, PNorm = 167.2714, GNorm = 0.5295, lr_0 = 4.6254e-04
Loss = 1.9482e-02, PNorm = 167.2909, GNorm = 0.3448, lr_0 = 4.6222e-04
Loss = 1.9445e-02, PNorm = 167.3135, GNorm = 0.1869, lr_0 = 4.6191e-04
Loss = 2.8820e-02, PNorm = 167.3380, GNorm = 0.3453, lr_0 = 4.6159e-04
Loss = 1.6883e-02, PNorm = 167.3625, GNorm = 0.2702, lr_0 = 4.6127e-04
Loss = 2.1153e-02, PNorm = 167.3840, GNorm = 0.2744, lr_0 = 4.6096e-04
Loss = 1.8001e-02, PNorm = 167.4109, GNorm = 0.4250, lr_0 = 4.6064e-04
Loss = 3.2033e-02, PNorm = 167.4375, GNorm = 1.0357, lr_0 = 4.6033e-04
Loss = 3.9234e-02, PNorm = 167.4652, GNorm = 0.7745, lr_0 = 4.6001e-04
Loss = 3.2017e-02, PNorm = 167.4868, GNorm = 0.3602, lr_0 = 4.5970e-04
Loss = 2.0109e-02, PNorm = 167.5145, GNorm = 0.5055, lr_0 = 4.5938e-04
Loss = 2.2469e-02, PNorm = 167.5438, GNorm = 0.1912, lr_0 = 4.5907e-04
Loss = 2.4914e-02, PNorm = 167.5689, GNorm = 0.5950, lr_0 = 4.5875e-04
Loss = 1.7889e-02, PNorm = 167.5983, GNorm = 0.3368, lr_0 = 4.5844e-04
Loss = 2.5155e-02, PNorm = 167.6227, GNorm = 0.7715, lr_0 = 4.5812e-04
Loss = 2.0785e-02, PNorm = 167.6521, GNorm = 0.3731, lr_0 = 4.5781e-04
Loss = 1.8622e-02, PNorm = 167.6799, GNorm = 0.3884, lr_0 = 4.5750e-04
Loss = 2.5238e-02, PNorm = 167.7079, GNorm = 0.2881, lr_0 = 4.5718e-04
Loss = 1.9667e-02, PNorm = 167.7344, GNorm = 0.6184, lr_0 = 4.5687e-04
Loss = 2.0924e-02, PNorm = 167.7602, GNorm = 0.2860, lr_0 = 4.5656e-04
Loss = 2.3864e-02, PNorm = 167.7827, GNorm = 0.2248, lr_0 = 4.5624e-04
Loss = 2.0711e-02, PNorm = 167.8079, GNorm = 0.2182, lr_0 = 4.5593e-04
Loss = 2.6955e-02, PNorm = 167.8369, GNorm = 0.4506, lr_0 = 4.5562e-04
Loss = 2.8012e-02, PNorm = 167.8629, GNorm = 0.6594, lr_0 = 4.5531e-04
Loss = 2.9015e-02, PNorm = 167.8870, GNorm = 0.4855, lr_0 = 4.5499e-04
Loss = 1.8621e-02, PNorm = 167.9171, GNorm = 0.5871, lr_0 = 4.5468e-04
Loss = 2.1830e-02, PNorm = 167.9437, GNorm = 0.3034, lr_0 = 4.5437e-04
Loss = 2.2168e-02, PNorm = 167.9689, GNorm = 0.4545, lr_0 = 4.5406e-04
Loss = 2.2922e-02, PNorm = 167.9982, GNorm = 0.3651, lr_0 = 4.5375e-04
Loss = 2.0030e-02, PNorm = 168.0327, GNorm = 0.2126, lr_0 = 4.5344e-04
Loss = 2.5045e-02, PNorm = 168.0664, GNorm = 0.3535, lr_0 = 4.5313e-04
Loss = 2.3992e-02, PNorm = 168.0976, GNorm = 0.2588, lr_0 = 4.5282e-04
Loss = 2.0569e-02, PNorm = 168.1281, GNorm = 0.5361, lr_0 = 4.5251e-04
Loss = 2.1705e-02, PNorm = 168.1551, GNorm = 0.2460, lr_0 = 4.5220e-04
Loss = 2.7116e-02, PNorm = 168.1821, GNorm = 0.3674, lr_0 = 4.5189e-04
Loss = 2.7148e-02, PNorm = 168.2095, GNorm = 0.4077, lr_0 = 4.5158e-04
Loss = 2.3501e-02, PNorm = 168.2383, GNorm = 0.2946, lr_0 = 4.5127e-04
Loss = 2.0431e-02, PNorm = 168.2689, GNorm = 0.8672, lr_0 = 4.5096e-04
Loss = 1.9135e-02, PNorm = 168.2970, GNorm = 0.2641, lr_0 = 4.5065e-04
Loss = 2.0413e-02, PNorm = 168.3257, GNorm = 0.2317, lr_0 = 4.5034e-04
Loss = 2.3869e-02, PNorm = 168.3535, GNorm = 0.4862, lr_0 = 4.5003e-04
Loss = 2.8573e-02, PNorm = 168.3833, GNorm = 0.4615, lr_0 = 4.4972e-04
Loss = 2.3649e-02, PNorm = 168.4082, GNorm = 0.3059, lr_0 = 4.4942e-04
Loss = 1.8757e-02, PNorm = 168.4316, GNorm = 0.3197, lr_0 = 4.4911e-04
Loss = 2.2004e-02, PNorm = 168.4547, GNorm = 0.4477, lr_0 = 4.4880e-04
Loss = 1.9817e-02, PNorm = 168.4828, GNorm = 0.5051, lr_0 = 4.4849e-04
Loss = 2.5613e-02, PNorm = 168.5103, GNorm = 0.5091, lr_0 = 4.4819e-04
Loss = 2.1578e-02, PNorm = 168.5376, GNorm = 0.4152, lr_0 = 4.4788e-04
Loss = 2.0761e-02, PNorm = 168.5679, GNorm = 0.3733, lr_0 = 4.4757e-04
Loss = 2.7304e-02, PNorm = 168.5978, GNorm = 0.2976, lr_0 = 4.4727e-04
Loss = 2.6727e-02, PNorm = 168.6299, GNorm = 0.3043, lr_0 = 4.4696e-04
Loss = 1.9632e-02, PNorm = 168.6612, GNorm = 0.2648, lr_0 = 4.4665e-04
Loss = 2.0495e-02, PNorm = 168.6908, GNorm = 0.3953, lr_0 = 4.4635e-04
Loss = 1.7535e-02, PNorm = 168.7207, GNorm = 0.1695, lr_0 = 4.4604e-04
Loss = 2.1960e-02, PNorm = 168.7482, GNorm = 0.2086, lr_0 = 4.4574e-04
Loss = 2.3329e-02, PNorm = 168.7815, GNorm = 0.6845, lr_0 = 4.4543e-04
Loss = 2.1868e-02, PNorm = 168.8059, GNorm = 0.2194, lr_0 = 4.4513e-04
Loss = 2.0861e-02, PNorm = 168.8313, GNorm = 0.3603, lr_0 = 4.4482e-04
Loss = 2.0180e-02, PNorm = 168.8574, GNorm = 0.7697, lr_0 = 4.4452e-04
Loss = 2.0582e-02, PNorm = 168.8817, GNorm = 0.2682, lr_0 = 4.4421e-04
Loss = 1.8097e-02, PNorm = 168.9061, GNorm = 0.2110, lr_0 = 4.4391e-04
Loss = 3.0904e-02, PNorm = 168.9272, GNorm = 0.5152, lr_0 = 4.4360e-04
Loss = 2.5582e-02, PNorm = 168.9550, GNorm = 0.6245, lr_0 = 4.4330e-04
Loss = 1.9394e-02, PNorm = 168.9835, GNorm = 0.4100, lr_0 = 4.4299e-04
Loss = 2.2843e-02, PNorm = 169.0122, GNorm = 0.3659, lr_0 = 4.4269e-04
Loss = 2.0231e-02, PNorm = 169.0433, GNorm = 0.2473, lr_0 = 4.4239e-04
Loss = 2.5878e-02, PNorm = 169.0720, GNorm = 0.4563, lr_0 = 4.4209e-04
Loss = 2.9464e-02, PNorm = 169.1031, GNorm = 0.1844, lr_0 = 4.4178e-04
Loss = 2.0434e-02, PNorm = 169.1314, GNorm = 0.2654, lr_0 = 4.4148e-04
Loss = 2.3512e-02, PNorm = 169.1606, GNorm = 0.2095, lr_0 = 4.4118e-04
Loss = 1.7329e-02, PNorm = 169.1878, GNorm = 0.5129, lr_0 = 4.4088e-04
Loss = 3.1845e-02, PNorm = 169.2163, GNorm = 0.3547, lr_0 = 4.4057e-04
Loss = 1.9917e-02, PNorm = 169.2428, GNorm = 0.3497, lr_0 = 4.4027e-04
Loss = 2.3987e-02, PNorm = 169.2700, GNorm = 0.2256, lr_0 = 4.3997e-04
Loss = 2.8781e-02, PNorm = 169.2957, GNorm = 0.2190, lr_0 = 4.3967e-04
Loss = 2.3232e-02, PNorm = 169.3215, GNorm = 0.4423, lr_0 = 4.3937e-04
Validation mae = 0.121485
Epoch 12
Loss = 1.8139e-02, PNorm = 169.3467, GNorm = 0.1593, lr_0 = 4.3907e-04
Loss = 1.9417e-02, PNorm = 169.3706, GNorm = 0.3393, lr_0 = 4.3877e-04
Loss = 2.3521e-02, PNorm = 169.3909, GNorm = 0.3720, lr_0 = 4.3846e-04
Loss = 1.7991e-02, PNorm = 169.4133, GNorm = 0.3233, lr_0 = 4.3816e-04
Loss = 2.3985e-02, PNorm = 169.4357, GNorm = 0.3492, lr_0 = 4.3786e-04
Loss = 2.2297e-02, PNorm = 169.4540, GNorm = 0.4129, lr_0 = 4.3756e-04
Loss = 2.7101e-02, PNorm = 169.4782, GNorm = 0.5381, lr_0 = 4.3726e-04
Loss = 1.7275e-02, PNorm = 169.5002, GNorm = 0.5644, lr_0 = 4.3696e-04
Loss = 2.1096e-02, PNorm = 169.5233, GNorm = 0.1707, lr_0 = 4.3667e-04
Loss = 2.0377e-02, PNorm = 169.5444, GNorm = 0.3580, lr_0 = 4.3637e-04
Loss = 1.8473e-02, PNorm = 169.5671, GNorm = 0.2275, lr_0 = 4.3607e-04
Loss = 2.7348e-02, PNorm = 169.5847, GNorm = 0.3942, lr_0 = 4.3577e-04
Loss = 2.0063e-02, PNorm = 169.6019, GNorm = 0.2447, lr_0 = 4.3547e-04
Loss = 1.7687e-02, PNorm = 169.6181, GNorm = 0.3156, lr_0 = 4.3517e-04
Loss = 1.9576e-02, PNorm = 169.6362, GNorm = 0.2047, lr_0 = 4.3487e-04
Loss = 2.2246e-02, PNorm = 169.6547, GNorm = 0.3564, lr_0 = 4.3458e-04
Loss = 1.8309e-02, PNorm = 169.6746, GNorm = 0.5546, lr_0 = 4.3428e-04
Loss = 2.1537e-02, PNorm = 169.6997, GNorm = 0.1383, lr_0 = 4.3398e-04
Loss = 1.8667e-02, PNorm = 169.7195, GNorm = 0.2845, lr_0 = 4.3368e-04
Loss = 1.5108e-02, PNorm = 169.7391, GNorm = 0.1637, lr_0 = 4.3339e-04
Loss = 1.6329e-02, PNorm = 169.7586, GNorm = 0.1826, lr_0 = 4.3309e-04
Loss = 2.0040e-02, PNorm = 169.7821, GNorm = 0.3673, lr_0 = 4.3279e-04
Loss = 1.9385e-02, PNorm = 169.8031, GNorm = 0.5441, lr_0 = 4.3250e-04
Loss = 1.8074e-02, PNorm = 169.8250, GNorm = 0.2287, lr_0 = 4.3220e-04
Loss = 1.9553e-02, PNorm = 169.8435, GNorm = 0.2176, lr_0 = 4.3190e-04
Loss = 1.6658e-02, PNorm = 169.8641, GNorm = 0.2051, lr_0 = 4.3161e-04
Loss = 1.4950e-02, PNorm = 169.8857, GNorm = 0.6228, lr_0 = 4.3131e-04
Loss = 1.7982e-02, PNorm = 169.9042, GNorm = 0.2622, lr_0 = 4.3102e-04
Loss = 2.0914e-02, PNorm = 169.9206, GNorm = 0.2284, lr_0 = 4.3072e-04
Loss = 2.2692e-02, PNorm = 169.9348, GNorm = 0.5260, lr_0 = 4.3043e-04
Loss = 1.6087e-02, PNorm = 169.9558, GNorm = 0.2402, lr_0 = 4.3013e-04
Loss = 2.4182e-02, PNorm = 169.9760, GNorm = 0.3563, lr_0 = 4.2984e-04
Loss = 2.4717e-02, PNorm = 169.9995, GNorm = 0.2031, lr_0 = 4.2954e-04
Loss = 1.7219e-02, PNorm = 170.0217, GNorm = 0.3120, lr_0 = 4.2925e-04
Loss = 1.6003e-02, PNorm = 170.0448, GNorm = 0.1985, lr_0 = 4.2895e-04
Loss = 1.5281e-02, PNorm = 170.0652, GNorm = 0.1502, lr_0 = 4.2866e-04
Loss = 1.4575e-02, PNorm = 170.0836, GNorm = 0.6130, lr_0 = 4.2837e-04
Loss = 2.3385e-02, PNorm = 170.1073, GNorm = 0.3566, lr_0 = 4.2807e-04
Loss = 2.1521e-02, PNorm = 170.1335, GNorm = 0.2523, lr_0 = 4.2778e-04
Loss = 2.1748e-02, PNorm = 170.1536, GNorm = 0.1782, lr_0 = 4.2749e-04
Loss = 1.8871e-02, PNorm = 170.1727, GNorm = 0.4750, lr_0 = 4.2719e-04
Loss = 1.5307e-02, PNorm = 170.1914, GNorm = 0.2082, lr_0 = 4.2690e-04
Loss = 1.6158e-02, PNorm = 170.2129, GNorm = 0.2216, lr_0 = 4.2661e-04
Loss = 1.6629e-02, PNorm = 170.2327, GNorm = 0.3413, lr_0 = 4.2632e-04
Loss = 1.6227e-02, PNorm = 170.2498, GNorm = 0.3465, lr_0 = 4.2602e-04
Loss = 1.9713e-02, PNorm = 170.2668, GNorm = 0.3408, lr_0 = 4.2573e-04
Loss = 2.1778e-02, PNorm = 170.2852, GNorm = 0.4153, lr_0 = 4.2544e-04
Loss = 1.7731e-02, PNorm = 170.3108, GNorm = 0.2858, lr_0 = 4.2515e-04
Loss = 1.5941e-02, PNorm = 170.3357, GNorm = 0.4127, lr_0 = 4.2486e-04
Loss = 2.1987e-02, PNorm = 170.3546, GNorm = 0.1994, lr_0 = 4.2457e-04
Loss = 2.1884e-02, PNorm = 170.3801, GNorm = 0.4000, lr_0 = 4.2428e-04
Loss = 1.9928e-02, PNorm = 170.4056, GNorm = 0.5397, lr_0 = 4.2399e-04
Loss = 1.8665e-02, PNorm = 170.4282, GNorm = 0.3760, lr_0 = 4.2370e-04
Loss = 3.5872e-02, PNorm = 170.4576, GNorm = 0.3746, lr_0 = 4.2340e-04
Loss = 2.6451e-02, PNorm = 170.4773, GNorm = 0.8710, lr_0 = 4.2311e-04
Loss = 1.9283e-02, PNorm = 170.5028, GNorm = 0.7212, lr_0 = 4.2283e-04
Loss = 1.8211e-02, PNorm = 170.5257, GNorm = 0.2228, lr_0 = 4.2254e-04
Loss = 2.2368e-02, PNorm = 170.5505, GNorm = 0.6426, lr_0 = 4.2225e-04
Loss = 1.3501e-02, PNorm = 170.5735, GNorm = 0.3031, lr_0 = 4.2196e-04
Loss = 1.8774e-02, PNorm = 170.5952, GNorm = 0.1481, lr_0 = 4.2167e-04
Loss = 2.9588e-02, PNorm = 170.6168, GNorm = 0.7037, lr_0 = 4.2138e-04
Loss = 2.0956e-02, PNorm = 170.6344, GNorm = 0.2246, lr_0 = 4.2109e-04
Loss = 1.9406e-02, PNorm = 170.6583, GNorm = 0.3022, lr_0 = 4.2080e-04
Loss = 1.7051e-02, PNorm = 170.6849, GNorm = 0.1849, lr_0 = 4.2051e-04
Loss = 1.7341e-02, PNorm = 170.7102, GNorm = 0.2021, lr_0 = 4.2023e-04
Loss = 1.9353e-02, PNorm = 170.7374, GNorm = 0.4652, lr_0 = 4.1994e-04
Loss = 3.4070e-02, PNorm = 170.7632, GNorm = 0.4408, lr_0 = 4.1965e-04
Loss = 2.1341e-02, PNorm = 170.7866, GNorm = 0.7130, lr_0 = 4.1936e-04
Loss = 1.7636e-02, PNorm = 170.8087, GNorm = 0.2054, lr_0 = 4.1907e-04
Loss = 1.8043e-02, PNorm = 170.8343, GNorm = 0.3323, lr_0 = 4.1879e-04
Loss = 1.4127e-02, PNorm = 170.8618, GNorm = 0.2964, lr_0 = 4.1850e-04
Loss = 2.4775e-02, PNorm = 170.8883, GNorm = 0.7847, lr_0 = 4.1821e-04
Loss = 2.4772e-02, PNorm = 170.9237, GNorm = 0.4306, lr_0 = 4.1793e-04
Loss = 1.5870e-02, PNorm = 170.9513, GNorm = 0.2821, lr_0 = 4.1764e-04
Loss = 2.1053e-02, PNorm = 170.9808, GNorm = 0.2691, lr_0 = 4.1736e-04
Loss = 2.3510e-02, PNorm = 171.0045, GNorm = 0.2231, lr_0 = 4.1707e-04
Loss = 1.9635e-02, PNorm = 171.0265, GNorm = 0.4347, lr_0 = 4.1678e-04
Loss = 2.1560e-02, PNorm = 171.0539, GNorm = 0.6677, lr_0 = 4.1650e-04
Loss = 2.1222e-02, PNorm = 171.0776, GNorm = 0.8273, lr_0 = 4.1621e-04
Loss = 1.6684e-02, PNorm = 171.1030, GNorm = 0.2856, lr_0 = 4.1593e-04
Loss = 2.2811e-02, PNorm = 171.1251, GNorm = 0.2760, lr_0 = 4.1564e-04
Loss = 1.9497e-02, PNorm = 171.1511, GNorm = 0.5336, lr_0 = 4.1536e-04
Loss = 1.9080e-02, PNorm = 171.1802, GNorm = 0.3436, lr_0 = 4.1507e-04
Loss = 1.6749e-02, PNorm = 171.2068, GNorm = 0.6010, lr_0 = 4.1479e-04
Loss = 1.6207e-02, PNorm = 171.2280, GNorm = 0.5042, lr_0 = 4.1450e-04
Loss = 2.1920e-02, PNorm = 171.2507, GNorm = 0.5876, lr_0 = 4.1422e-04
Loss = 2.8462e-02, PNorm = 171.2744, GNorm = 0.3008, lr_0 = 4.1394e-04
Loss = 1.5939e-02, PNorm = 171.2960, GNorm = 0.3948, lr_0 = 4.1365e-04
Loss = 2.0246e-02, PNorm = 171.3178, GNorm = 0.3371, lr_0 = 4.1337e-04
Loss = 1.9969e-02, PNorm = 171.3409, GNorm = 0.2168, lr_0 = 4.1309e-04
Loss = 1.6165e-02, PNorm = 171.3662, GNorm = 0.3714, lr_0 = 4.1280e-04
Loss = 2.6684e-02, PNorm = 171.3890, GNorm = 0.3197, lr_0 = 4.1252e-04
Loss = 1.8277e-02, PNorm = 171.4192, GNorm = 0.5541, lr_0 = 4.1224e-04
Loss = 2.1083e-02, PNorm = 171.4441, GNorm = 0.5071, lr_0 = 4.1196e-04
Loss = 1.9027e-02, PNorm = 171.4690, GNorm = 0.7013, lr_0 = 4.1167e-04
Loss = 1.9279e-02, PNorm = 171.4960, GNorm = 0.4208, lr_0 = 4.1139e-04
Loss = 2.3428e-02, PNorm = 171.5195, GNorm = 0.2694, lr_0 = 4.1111e-04
Loss = 2.1269e-02, PNorm = 171.5466, GNorm = 0.1784, lr_0 = 4.1083e-04
Loss = 1.7228e-02, PNorm = 171.5707, GNorm = 0.2543, lr_0 = 4.1055e-04
Loss = 1.9460e-02, PNorm = 171.5932, GNorm = 0.5267, lr_0 = 4.1027e-04
Loss = 1.6869e-02, PNorm = 171.6200, GNorm = 0.1368, lr_0 = 4.0998e-04
Loss = 1.7854e-02, PNorm = 171.6458, GNorm = 0.1539, lr_0 = 4.0970e-04
Loss = 1.6702e-02, PNorm = 171.6705, GNorm = 0.3162, lr_0 = 4.0942e-04
Loss = 1.7713e-02, PNorm = 171.6953, GNorm = 0.2986, lr_0 = 4.0914e-04
Loss = 1.7276e-02, PNorm = 171.7213, GNorm = 0.4563, lr_0 = 4.0886e-04
Loss = 2.1926e-02, PNorm = 171.7442, GNorm = 0.2263, lr_0 = 4.0858e-04
Loss = 1.9728e-02, PNorm = 171.7695, GNorm = 0.3259, lr_0 = 4.0830e-04
Loss = 2.1839e-02, PNorm = 171.7950, GNorm = 0.1647, lr_0 = 4.0802e-04
Loss = 1.6558e-02, PNorm = 171.8187, GNorm = 0.2286, lr_0 = 4.0774e-04
Loss = 1.6050e-02, PNorm = 171.8431, GNorm = 0.2685, lr_0 = 4.0746e-04
Loss = 2.3862e-02, PNorm = 171.8664, GNorm = 0.2529, lr_0 = 4.0718e-04
Loss = 1.4673e-02, PNorm = 171.8895, GNorm = 0.3019, lr_0 = 4.0691e-04
Loss = 1.6402e-02, PNorm = 171.9118, GNorm = 0.2094, lr_0 = 4.0663e-04
Loss = 2.2786e-02, PNorm = 171.9366, GNorm = 0.6572, lr_0 = 4.0635e-04
Loss = 1.9808e-02, PNorm = 171.9591, GNorm = 0.6748, lr_0 = 4.0607e-04
Loss = 1.9434e-02, PNorm = 171.9833, GNorm = 0.5769, lr_0 = 4.0579e-04
Loss = 1.7810e-02, PNorm = 172.0076, GNorm = 0.3996, lr_0 = 4.0551e-04
Loss = 1.4137e-02, PNorm = 172.0286, GNorm = 0.1792, lr_0 = 4.0524e-04
Loss = 2.2126e-02, PNorm = 172.0532, GNorm = 0.2119, lr_0 = 4.0496e-04
Loss = 2.2090e-02, PNorm = 172.0744, GNorm = 0.6008, lr_0 = 4.0468e-04
Validation mae = 0.121790
Epoch 13
Loss = 2.3478e-02, PNorm = 172.0977, GNorm = 0.2172, lr_0 = 4.0440e-04
Loss = 1.8334e-02, PNorm = 172.1130, GNorm = 0.2234, lr_0 = 4.0413e-04
Loss = 1.9635e-02, PNorm = 172.1281, GNorm = 0.4653, lr_0 = 4.0385e-04
Loss = 2.0918e-02, PNorm = 172.1447, GNorm = 0.2944, lr_0 = 4.0357e-04
Loss = 1.5642e-02, PNorm = 172.1587, GNorm = 0.2023, lr_0 = 4.0330e-04
Loss = 1.7909e-02, PNorm = 172.1740, GNorm = 0.5275, lr_0 = 4.0302e-04
Loss = 1.5463e-02, PNorm = 172.1918, GNorm = 0.2405, lr_0 = 4.0274e-04
Loss = 1.5719e-02, PNorm = 172.2077, GNorm = 0.1550, lr_0 = 4.0247e-04
Loss = 1.6224e-02, PNorm = 172.2253, GNorm = 0.1684, lr_0 = 4.0219e-04
Loss = 1.6855e-02, PNorm = 172.2418, GNorm = 0.3405, lr_0 = 4.0192e-04
Loss = 1.5356e-02, PNorm = 172.2615, GNorm = 0.3977, lr_0 = 4.0164e-04
Loss = 2.3935e-02, PNorm = 172.2815, GNorm = 0.5424, lr_0 = 4.0137e-04
Loss = 1.7147e-02, PNorm = 172.3000, GNorm = 0.4365, lr_0 = 4.0109e-04
Loss = 1.3771e-02, PNorm = 172.3191, GNorm = 0.2777, lr_0 = 4.0082e-04
Loss = 1.6852e-02, PNorm = 172.3339, GNorm = 0.3719, lr_0 = 4.0054e-04
Loss = 1.6248e-02, PNorm = 172.3501, GNorm = 0.1988, lr_0 = 4.0027e-04
Loss = 1.7295e-02, PNorm = 172.3686, GNorm = 0.2115, lr_0 = 3.9999e-04
Loss = 1.4969e-02, PNorm = 172.3870, GNorm = 0.2284, lr_0 = 3.9972e-04
Loss = 1.4032e-02, PNorm = 172.4060, GNorm = 0.3794, lr_0 = 3.9945e-04
Loss = 1.7444e-02, PNorm = 172.4227, GNorm = 0.4020, lr_0 = 3.9917e-04
Loss = 1.4188e-02, PNorm = 172.4420, GNorm = 0.1725, lr_0 = 3.9890e-04
Loss = 2.5319e-02, PNorm = 172.4593, GNorm = 0.1830, lr_0 = 3.9863e-04
Loss = 1.9596e-02, PNorm = 172.4710, GNorm = 0.2057, lr_0 = 3.9835e-04
Loss = 1.9428e-02, PNorm = 172.4877, GNorm = 0.2352, lr_0 = 3.9808e-04
Loss = 1.5645e-02, PNorm = 172.5094, GNorm = 0.2832, lr_0 = 3.9781e-04
Loss = 2.0070e-02, PNorm = 172.5276, GNorm = 0.2600, lr_0 = 3.9753e-04
Loss = 1.7608e-02, PNorm = 172.5471, GNorm = 0.2618, lr_0 = 3.9726e-04
Loss = 3.4742e-02, PNorm = 172.5695, GNorm = 0.2706, lr_0 = 3.9699e-04
Loss = 1.4859e-02, PNorm = 172.5933, GNorm = 0.1803, lr_0 = 3.9672e-04
Loss = 1.6840e-02, PNorm = 172.6106, GNorm = 0.3559, lr_0 = 3.9645e-04
Loss = 2.8341e-02, PNorm = 172.6251, GNorm = 0.4951, lr_0 = 3.9617e-04
Loss = 2.0063e-02, PNorm = 172.6429, GNorm = 0.3084, lr_0 = 3.9590e-04
Loss = 1.8949e-02, PNorm = 172.6578, GNorm = 0.1301, lr_0 = 3.9563e-04
Loss = 1.2759e-02, PNorm = 172.6730, GNorm = 0.2180, lr_0 = 3.9536e-04
Loss = 2.0617e-02, PNorm = 172.6954, GNorm = 0.3374, lr_0 = 3.9509e-04
Loss = 1.5271e-02, PNorm = 172.7160, GNorm = 0.3499, lr_0 = 3.9482e-04
Loss = 1.8509e-02, PNorm = 172.7321, GNorm = 0.2280, lr_0 = 3.9455e-04
Loss = 1.2378e-02, PNorm = 172.7513, GNorm = 0.2222, lr_0 = 3.9428e-04
Loss = 1.5085e-02, PNorm = 172.7705, GNorm = 0.1674, lr_0 = 3.9401e-04
Loss = 1.4132e-02, PNorm = 172.7892, GNorm = 0.2933, lr_0 = 3.9374e-04
Loss = 1.8640e-02, PNorm = 172.8070, GNorm = 0.5466, lr_0 = 3.9347e-04
Loss = 1.3151e-02, PNorm = 172.8254, GNorm = 0.2270, lr_0 = 3.9320e-04
Loss = 1.3535e-02, PNorm = 172.8423, GNorm = 0.4437, lr_0 = 3.9293e-04
Loss = 1.4061e-02, PNorm = 172.8567, GNorm = 0.2571, lr_0 = 3.9266e-04
Loss = 1.4678e-02, PNorm = 172.8707, GNorm = 0.4038, lr_0 = 3.9239e-04
Loss = 1.3719e-02, PNorm = 172.8864, GNorm = 0.2078, lr_0 = 3.9212e-04
Loss = 1.4571e-02, PNorm = 172.9015, GNorm = 0.3455, lr_0 = 3.9185e-04
Loss = 1.6792e-02, PNorm = 172.9160, GNorm = 0.4250, lr_0 = 3.9159e-04
Loss = 1.6031e-02, PNorm = 172.9369, GNorm = 0.4606, lr_0 = 3.9132e-04
Loss = 2.1220e-02, PNorm = 172.9563, GNorm = 0.2949, lr_0 = 3.9105e-04
Loss = 1.4006e-02, PNorm = 172.9746, GNorm = 0.1868, lr_0 = 3.9078e-04
Loss = 1.5573e-02, PNorm = 172.9936, GNorm = 0.1871, lr_0 = 3.9051e-04
Loss = 2.1547e-02, PNorm = 173.0132, GNorm = 0.4033, lr_0 = 3.9025e-04
Loss = 1.5090e-02, PNorm = 173.0308, GNorm = 0.1757, lr_0 = 3.8998e-04
Loss = 1.3922e-02, PNorm = 173.0508, GNorm = 0.1632, lr_0 = 3.8971e-04
Loss = 1.6286e-02, PNorm = 173.0706, GNorm = 0.2172, lr_0 = 3.8945e-04
Loss = 1.8552e-02, PNorm = 173.0877, GNorm = 0.2204, lr_0 = 3.8918e-04
Loss = 1.7182e-02, PNorm = 173.1047, GNorm = 0.2026, lr_0 = 3.8891e-04
Loss = 1.4999e-02, PNorm = 173.1269, GNorm = 0.1476, lr_0 = 3.8865e-04
Loss = 1.4257e-02, PNorm = 173.1472, GNorm = 0.2876, lr_0 = 3.8838e-04
Loss = 1.8152e-02, PNorm = 173.1648, GNorm = 0.5888, lr_0 = 3.8811e-04
Loss = 1.3979e-02, PNorm = 173.1829, GNorm = 0.1458, lr_0 = 3.8785e-04
Loss = 1.4736e-02, PNorm = 173.2002, GNorm = 0.3006, lr_0 = 3.8758e-04
Loss = 1.5055e-02, PNorm = 173.2204, GNorm = 0.2727, lr_0 = 3.8732e-04
Loss = 1.2552e-02, PNorm = 173.2402, GNorm = 0.1645, lr_0 = 3.8705e-04
Loss = 1.5500e-02, PNorm = 173.2535, GNorm = 0.1801, lr_0 = 3.8679e-04
Loss = 1.5154e-02, PNorm = 173.2704, GNorm = 0.1899, lr_0 = 3.8652e-04
Loss = 1.9610e-02, PNorm = 173.2911, GNorm = 0.2976, lr_0 = 3.8626e-04
Loss = 1.3886e-02, PNorm = 173.3108, GNorm = 0.3238, lr_0 = 3.8599e-04
Loss = 1.8726e-02, PNorm = 173.3318, GNorm = 0.3449, lr_0 = 3.8573e-04
Loss = 1.4630e-02, PNorm = 173.3480, GNorm = 0.3554, lr_0 = 3.8546e-04
Loss = 1.4955e-02, PNorm = 173.3635, GNorm = 0.2147, lr_0 = 3.8520e-04
Loss = 2.1991e-02, PNorm = 173.3805, GNorm = 0.2349, lr_0 = 3.8493e-04
Loss = 1.3667e-02, PNorm = 173.3989, GNorm = 0.1323, lr_0 = 3.8467e-04
Loss = 2.3118e-02, PNorm = 173.4194, GNorm = 0.4122, lr_0 = 3.8441e-04
Loss = 1.5159e-02, PNorm = 173.4367, GNorm = 0.2132, lr_0 = 3.8414e-04
Loss = 1.4318e-02, PNorm = 173.4561, GNorm = 0.3831, lr_0 = 3.8388e-04
Loss = 1.3440e-02, PNorm = 173.4746, GNorm = 0.1697, lr_0 = 3.8362e-04
Loss = 1.5986e-02, PNorm = 173.4993, GNorm = 0.3559, lr_0 = 3.8336e-04
Loss = 1.5464e-02, PNorm = 173.5196, GNorm = 0.1623, lr_0 = 3.8309e-04
Loss = 1.2649e-02, PNorm = 173.5376, GNorm = 0.4618, lr_0 = 3.8283e-04
Loss = 1.4134e-02, PNorm = 173.5523, GNorm = 0.4995, lr_0 = 3.8257e-04
Loss = 1.6304e-02, PNorm = 173.5670, GNorm = 0.3962, lr_0 = 3.8231e-04
Loss = 1.4251e-02, PNorm = 173.5850, GNorm = 0.3477, lr_0 = 3.8204e-04
Loss = 1.3143e-02, PNorm = 173.6034, GNorm = 0.2807, lr_0 = 3.8178e-04
Loss = 1.5758e-02, PNorm = 173.6165, GNorm = 0.1554, lr_0 = 3.8152e-04
Loss = 1.5176e-02, PNorm = 173.6342, GNorm = 0.3543, lr_0 = 3.8126e-04
Loss = 1.3264e-02, PNorm = 173.6523, GNorm = 0.2210, lr_0 = 3.8100e-04
Loss = 1.3635e-02, PNorm = 173.6702, GNorm = 0.2610, lr_0 = 3.8074e-04
Loss = 2.0214e-02, PNorm = 173.6857, GNorm = 0.2612, lr_0 = 3.8048e-04
Loss = 1.5594e-02, PNorm = 173.7048, GNorm = 0.3277, lr_0 = 3.8022e-04
Loss = 1.6218e-02, PNorm = 173.7263, GNorm = 0.3113, lr_0 = 3.7995e-04
Loss = 1.7332e-02, PNorm = 173.7460, GNorm = 0.2153, lr_0 = 3.7969e-04
Loss = 1.4767e-02, PNorm = 173.7642, GNorm = 0.6056, lr_0 = 3.7943e-04
Loss = 1.7111e-02, PNorm = 173.7832, GNorm = 0.2709, lr_0 = 3.7917e-04
Loss = 1.5561e-02, PNorm = 173.8036, GNorm = 0.3427, lr_0 = 3.7891e-04
Loss = 1.5231e-02, PNorm = 173.8209, GNorm = 0.3567, lr_0 = 3.7866e-04
Loss = 1.8064e-02, PNorm = 173.8391, GNorm = 0.2592, lr_0 = 3.7840e-04
Loss = 2.4944e-02, PNorm = 173.8569, GNorm = 0.3003, lr_0 = 3.7814e-04
Loss = 1.5203e-02, PNorm = 173.8783, GNorm = 0.2161, lr_0 = 3.7788e-04
Loss = 1.8368e-02, PNorm = 173.8963, GNorm = 0.2234, lr_0 = 3.7762e-04
Loss = 1.6169e-02, PNorm = 173.9163, GNorm = 0.2894, lr_0 = 3.7736e-04
Loss = 1.8080e-02, PNorm = 173.9401, GNorm = 0.2217, lr_0 = 3.7710e-04
Loss = 1.5451e-02, PNorm = 173.9634, GNorm = 0.1899, lr_0 = 3.7684e-04
Loss = 1.4569e-02, PNorm = 173.9857, GNorm = 0.3947, lr_0 = 3.7659e-04
Loss = 2.0147e-02, PNorm = 174.0075, GNorm = 0.1994, lr_0 = 3.7633e-04
Loss = 1.9742e-02, PNorm = 174.0255, GNorm = 0.1785, lr_0 = 3.7607e-04
Loss = 1.6814e-02, PNorm = 174.0443, GNorm = 0.3403, lr_0 = 3.7581e-04
Loss = 2.1477e-02, PNorm = 174.0652, GNorm = 0.4089, lr_0 = 3.7555e-04
Loss = 1.6627e-02, PNorm = 174.0896, GNorm = 0.4470, lr_0 = 3.7530e-04
Loss = 1.4902e-02, PNorm = 174.1114, GNorm = 0.2140, lr_0 = 3.7504e-04
Loss = 1.7566e-02, PNorm = 174.1319, GNorm = 0.4225, lr_0 = 3.7478e-04
Loss = 1.5768e-02, PNorm = 174.1535, GNorm = 0.1624, lr_0 = 3.7453e-04
Loss = 1.6617e-02, PNorm = 174.1728, GNorm = 0.3716, lr_0 = 3.7427e-04
Loss = 1.5137e-02, PNorm = 174.1905, GNorm = 0.1818, lr_0 = 3.7401e-04
Loss = 1.4919e-02, PNorm = 174.2090, GNorm = 0.3795, lr_0 = 3.7376e-04
Loss = 1.4522e-02, PNorm = 174.2273, GNorm = 0.6654, lr_0 = 3.7350e-04
Loss = 1.8350e-02, PNorm = 174.2479, GNorm = 0.2097, lr_0 = 3.7325e-04
Loss = 1.4467e-02, PNorm = 174.2676, GNorm = 0.3459, lr_0 = 3.7299e-04
Loss = 1.6229e-02, PNorm = 174.2849, GNorm = 0.1818, lr_0 = 3.7273e-04
Validation mae = 0.121396
Epoch 14
Loss = 1.2495e-02, PNorm = 174.3051, GNorm = 0.3036, lr_0 = 3.7248e-04
Loss = 1.5396e-02, PNorm = 174.3189, GNorm = 0.4762, lr_0 = 3.7222e-04
Loss = 1.3531e-02, PNorm = 174.3333, GNorm = 0.2019, lr_0 = 3.7197e-04
Loss = 1.9550e-02, PNorm = 174.3472, GNorm = 0.3947, lr_0 = 3.7171e-04
Loss = 1.4680e-02, PNorm = 174.3603, GNorm = 0.2060, lr_0 = 3.7146e-04
Loss = 1.7742e-02, PNorm = 174.3757, GNorm = 0.2597, lr_0 = 3.7120e-04
Loss = 1.8915e-02, PNorm = 174.3907, GNorm = 0.6911, lr_0 = 3.7095e-04
Loss = 1.8228e-02, PNorm = 174.4064, GNorm = 0.2793, lr_0 = 3.7070e-04
Loss = 1.8713e-02, PNorm = 174.4281, GNorm = 0.2683, lr_0 = 3.7044e-04
Loss = 2.6536e-02, PNorm = 174.4455, GNorm = 2.8844, lr_0 = 3.7019e-04
Loss = 1.2643e-02, PNorm = 174.4622, GNorm = 0.3778, lr_0 = 3.6993e-04
Loss = 1.7965e-02, PNorm = 174.4778, GNorm = 0.2003, lr_0 = 3.6968e-04
Loss = 1.2150e-02, PNorm = 174.4959, GNorm = 0.2249, lr_0 = 3.6943e-04
Loss = 1.3104e-02, PNorm = 174.5082, GNorm = 0.2389, lr_0 = 3.6917e-04
Loss = 1.4519e-02, PNorm = 174.5204, GNorm = 0.4431, lr_0 = 3.6892e-04
Loss = 1.3240e-02, PNorm = 174.5347, GNorm = 0.2616, lr_0 = 3.6867e-04
Loss = 1.3964e-02, PNorm = 174.5506, GNorm = 0.3567, lr_0 = 3.6842e-04
Loss = 1.2381e-02, PNorm = 174.5693, GNorm = 0.1830, lr_0 = 3.6816e-04
Loss = 1.4609e-02, PNorm = 174.5868, GNorm = 0.4584, lr_0 = 3.6791e-04
Loss = 1.4597e-02, PNorm = 174.6035, GNorm = 0.3017, lr_0 = 3.6766e-04
Loss = 1.2066e-02, PNorm = 174.6199, GNorm = 0.2661, lr_0 = 3.6741e-04
Loss = 1.1667e-02, PNorm = 174.6304, GNorm = 0.2532, lr_0 = 3.6716e-04
Loss = 1.3314e-02, PNorm = 174.6427, GNorm = 0.2561, lr_0 = 3.6690e-04
Loss = 1.6785e-02, PNorm = 174.6568, GNorm = 0.2527, lr_0 = 3.6665e-04
Loss = 1.4230e-02, PNorm = 174.6704, GNorm = 0.4204, lr_0 = 3.6640e-04
Loss = 1.5821e-02, PNorm = 174.6857, GNorm = 0.3051, lr_0 = 3.6615e-04
Loss = 1.7583e-02, PNorm = 174.6987, GNorm = 0.4190, lr_0 = 3.6590e-04
Loss = 1.1250e-02, PNorm = 174.7131, GNorm = 0.3321, lr_0 = 3.6565e-04
Loss = 1.3981e-02, PNorm = 174.7252, GNorm = 0.3880, lr_0 = 3.6540e-04
Loss = 1.2369e-02, PNorm = 174.7375, GNorm = 0.1740, lr_0 = 3.6515e-04
Loss = 1.0438e-02, PNorm = 174.7507, GNorm = 0.1428, lr_0 = 3.6490e-04
Loss = 1.2125e-02, PNorm = 174.7613, GNorm = 0.3432, lr_0 = 3.6465e-04
Loss = 1.0960e-02, PNorm = 174.7743, GNorm = 0.2012, lr_0 = 3.6440e-04
Loss = 1.0584e-02, PNorm = 174.7878, GNorm = 0.2218, lr_0 = 3.6415e-04
Loss = 1.2462e-02, PNorm = 174.8033, GNorm = 0.2335, lr_0 = 3.6390e-04
Loss = 1.2765e-02, PNorm = 174.8204, GNorm = 0.5092, lr_0 = 3.6365e-04
Loss = 1.2573e-02, PNorm = 174.8365, GNorm = 0.2433, lr_0 = 3.6340e-04
Loss = 1.8108e-02, PNorm = 174.8504, GNorm = 0.2069, lr_0 = 3.6315e-04
Loss = 1.1257e-02, PNorm = 174.8646, GNorm = 0.1809, lr_0 = 3.6290e-04
Loss = 1.8300e-02, PNorm = 174.8771, GNorm = 0.2034, lr_0 = 3.6266e-04
Loss = 1.5401e-02, PNorm = 174.8899, GNorm = 0.2184, lr_0 = 3.6241e-04
Loss = 1.3036e-02, PNorm = 174.9059, GNorm = 0.2773, lr_0 = 3.6216e-04
Loss = 1.1560e-02, PNorm = 174.9235, GNorm = 0.2109, lr_0 = 3.6191e-04
Loss = 1.3854e-02, PNorm = 174.9375, GNorm = 0.3921, lr_0 = 3.6166e-04
Loss = 1.0970e-02, PNorm = 174.9520, GNorm = 0.3187, lr_0 = 3.6141e-04
Loss = 1.1179e-02, PNorm = 174.9653, GNorm = 0.3247, lr_0 = 3.6117e-04
Loss = 1.0299e-02, PNorm = 174.9781, GNorm = 0.2081, lr_0 = 3.6092e-04
Loss = 1.4157e-02, PNorm = 174.9923, GNorm = 0.3290, lr_0 = 3.6067e-04
Loss = 1.3735e-02, PNorm = 175.0087, GNorm = 0.1843, lr_0 = 3.6043e-04
Loss = 1.3760e-02, PNorm = 175.0242, GNorm = 0.5328, lr_0 = 3.6018e-04
Loss = 1.2051e-02, PNorm = 175.0379, GNorm = 0.2040, lr_0 = 3.5993e-04
Loss = 1.3653e-02, PNorm = 175.0514, GNorm = 0.4048, lr_0 = 3.5969e-04
Loss = 1.0964e-02, PNorm = 175.0670, GNorm = 0.2337, lr_0 = 3.5944e-04
Loss = 1.1129e-02, PNorm = 175.0807, GNorm = 0.1508, lr_0 = 3.5919e-04
Loss = 1.2370e-02, PNorm = 175.0974, GNorm = 0.4671, lr_0 = 3.5895e-04
Loss = 1.2031e-02, PNorm = 175.1116, GNorm = 0.3747, lr_0 = 3.5870e-04
Loss = 2.3684e-02, PNorm = 175.1270, GNorm = 0.3623, lr_0 = 3.5845e-04
Loss = 1.3738e-02, PNorm = 175.1467, GNorm = 0.5734, lr_0 = 3.5821e-04
Loss = 1.7220e-02, PNorm = 175.1633, GNorm = 0.8344, lr_0 = 3.5796e-04
Loss = 1.4379e-02, PNorm = 175.1826, GNorm = 0.1923, lr_0 = 3.5772e-04
Loss = 1.0684e-02, PNorm = 175.2021, GNorm = 0.2433, lr_0 = 3.5747e-04
Loss = 1.1107e-02, PNorm = 175.2173, GNorm = 0.2869, lr_0 = 3.5723e-04
Loss = 1.2084e-02, PNorm = 175.2320, GNorm = 0.3111, lr_0 = 3.5698e-04
Loss = 1.1812e-02, PNorm = 175.2441, GNorm = 0.2309, lr_0 = 3.5674e-04
Loss = 1.5502e-02, PNorm = 175.2600, GNorm = 0.1981, lr_0 = 3.5650e-04
Loss = 1.4065e-02, PNorm = 175.2774, GNorm = 0.3371, lr_0 = 3.5625e-04
Loss = 1.2048e-02, PNorm = 175.2879, GNorm = 0.3803, lr_0 = 3.5601e-04
Loss = 1.1507e-02, PNorm = 175.3031, GNorm = 0.1504, lr_0 = 3.5576e-04
Loss = 1.3464e-02, PNorm = 175.3178, GNorm = 0.2043, lr_0 = 3.5552e-04
Loss = 1.0591e-02, PNorm = 175.3346, GNorm = 0.3237, lr_0 = 3.5528e-04
Loss = 2.2538e-02, PNorm = 175.3547, GNorm = 0.3667, lr_0 = 3.5503e-04
Loss = 1.9386e-02, PNorm = 175.3727, GNorm = 0.4659, lr_0 = 3.5479e-04
Loss = 1.4780e-02, PNorm = 175.3888, GNorm = 0.1467, lr_0 = 3.5455e-04
Loss = 1.2623e-02, PNorm = 175.4032, GNorm = 0.1652, lr_0 = 3.5430e-04
Loss = 1.4225e-02, PNorm = 175.4186, GNorm = 0.1864, lr_0 = 3.5406e-04
Loss = 1.5772e-02, PNorm = 175.4331, GNorm = 0.4767, lr_0 = 3.5382e-04
Loss = 1.5056e-02, PNorm = 175.4511, GNorm = 0.2512, lr_0 = 3.5358e-04
Loss = 1.6102e-02, PNorm = 175.4648, GNorm = 0.1770, lr_0 = 3.5333e-04
Loss = 1.3772e-02, PNorm = 175.4800, GNorm = 0.1464, lr_0 = 3.5309e-04
Loss = 1.1103e-02, PNorm = 175.4975, GNorm = 0.2105, lr_0 = 3.5285e-04
Loss = 1.2684e-02, PNorm = 175.5101, GNorm = 0.2191, lr_0 = 3.5261e-04
Loss = 1.2102e-02, PNorm = 175.5264, GNorm = 0.3609, lr_0 = 3.5237e-04
Loss = 1.1501e-02, PNorm = 175.5442, GNorm = 0.1880, lr_0 = 3.5212e-04
Loss = 1.3366e-02, PNorm = 175.5623, GNorm = 0.1219, lr_0 = 3.5188e-04
Loss = 1.4327e-02, PNorm = 175.5796, GNorm = 0.3176, lr_0 = 3.5164e-04
Loss = 1.5658e-02, PNorm = 175.5941, GNorm = 0.2444, lr_0 = 3.5140e-04
Loss = 1.0468e-02, PNorm = 175.6108, GNorm = 0.1978, lr_0 = 3.5116e-04
Loss = 1.6574e-02, PNorm = 175.6306, GNorm = 0.3699, lr_0 = 3.5092e-04
Loss = 1.5645e-02, PNorm = 175.6449, GNorm = 0.7085, lr_0 = 3.5068e-04
Loss = 1.4525e-02, PNorm = 175.6621, GNorm = 0.3402, lr_0 = 3.5044e-04
Loss = 1.2470e-02, PNorm = 175.6785, GNorm = 0.1941, lr_0 = 3.5020e-04
Loss = 2.2776e-02, PNorm = 175.6955, GNorm = 0.2303, lr_0 = 3.4996e-04
Loss = 1.2284e-02, PNorm = 175.7133, GNorm = 0.2122, lr_0 = 3.4972e-04
Loss = 1.8219e-02, PNorm = 175.7290, GNorm = 0.1838, lr_0 = 3.4948e-04
Loss = 1.4190e-02, PNorm = 175.7443, GNorm = 0.1577, lr_0 = 3.4924e-04
Loss = 1.2676e-02, PNorm = 175.7630, GNorm = 0.2494, lr_0 = 3.4900e-04
Loss = 1.4577e-02, PNorm = 175.7811, GNorm = 0.1548, lr_0 = 3.4876e-04
Loss = 1.1924e-02, PNorm = 175.7986, GNorm = 0.5012, lr_0 = 3.4852e-04
Loss = 1.2361e-02, PNorm = 175.8179, GNorm = 0.2772, lr_0 = 3.4828e-04
Loss = 1.1615e-02, PNorm = 175.8368, GNorm = 0.4891, lr_0 = 3.4805e-04
Loss = 2.2881e-02, PNorm = 175.8502, GNorm = 0.3357, lr_0 = 3.4781e-04
Loss = 1.3313e-02, PNorm = 175.8628, GNorm = 0.2632, lr_0 = 3.4757e-04
Loss = 1.3268e-02, PNorm = 175.8789, GNorm = 0.1845, lr_0 = 3.4733e-04
Loss = 1.3704e-02, PNorm = 175.8938, GNorm = 0.1813, lr_0 = 3.4709e-04
Loss = 1.1892e-02, PNorm = 175.9106, GNorm = 0.2108, lr_0 = 3.4686e-04
Loss = 1.6223e-02, PNorm = 175.9291, GNorm = 0.4458, lr_0 = 3.4662e-04
Loss = 2.5562e-02, PNorm = 175.9455, GNorm = 0.2073, lr_0 = 3.4638e-04
Loss = 1.0702e-02, PNorm = 175.9623, GNorm = 0.4486, lr_0 = 3.4614e-04
Loss = 1.2892e-02, PNorm = 175.9804, GNorm = 0.1822, lr_0 = 3.4591e-04
Loss = 1.7321e-02, PNorm = 176.0013, GNorm = 0.2084, lr_0 = 3.4567e-04
Loss = 1.5141e-02, PNorm = 176.0212, GNorm = 0.2161, lr_0 = 3.4543e-04
Loss = 1.4023e-02, PNorm = 176.0387, GNorm = 0.3610, lr_0 = 3.4520e-04
Loss = 1.6696e-02, PNorm = 176.0544, GNorm = 0.3126, lr_0 = 3.4496e-04
Loss = 1.3412e-02, PNorm = 176.0702, GNorm = 0.2147, lr_0 = 3.4472e-04
Loss = 1.5717e-02, PNorm = 176.0883, GNorm = 0.2310, lr_0 = 3.4449e-04
Loss = 1.3635e-02, PNorm = 176.1042, GNorm = 0.2018, lr_0 = 3.4425e-04
Loss = 1.5569e-02, PNorm = 176.1207, GNorm = 0.1749, lr_0 = 3.4402e-04
Loss = 1.3219e-02, PNorm = 176.1368, GNorm = 0.3986, lr_0 = 3.4378e-04
Loss = 1.4386e-02, PNorm = 176.1548, GNorm = 0.2812, lr_0 = 3.4354e-04
Loss = 1.5979e-02, PNorm = 176.1722, GNorm = 0.2423, lr_0 = 3.4331e-04
Validation mae = 0.121310
Epoch 15
Loss = 1.1019e-02, PNorm = 176.1873, GNorm = 0.5491, lr_0 = 3.4307e-04
Loss = 1.0851e-02, PNorm = 176.1978, GNorm = 0.1422, lr_0 = 3.4284e-04
Loss = 1.2642e-02, PNorm = 176.2090, GNorm = 0.4066, lr_0 = 3.4260e-04
Loss = 1.2916e-02, PNorm = 176.2200, GNorm = 0.3658, lr_0 = 3.4237e-04
Loss = 1.3663e-02, PNorm = 176.2313, GNorm = 0.2789, lr_0 = 3.4213e-04
Loss = 1.4192e-02, PNorm = 176.2423, GNorm = 0.2228, lr_0 = 3.4190e-04
Loss = 8.9311e-03, PNorm = 176.2555, GNorm = 0.4461, lr_0 = 3.4167e-04
Loss = 1.0136e-02, PNorm = 176.2687, GNorm = 0.2291, lr_0 = 3.4143e-04
Loss = 1.8003e-02, PNorm = 176.2801, GNorm = 0.2816, lr_0 = 3.4120e-04
Loss = 1.1796e-02, PNorm = 176.2895, GNorm = 0.4778, lr_0 = 3.4096e-04
Loss = 1.1616e-02, PNorm = 176.3044, GNorm = 0.5337, lr_0 = 3.4073e-04
Loss = 1.3513e-02, PNorm = 176.3137, GNorm = 0.2131, lr_0 = 3.4050e-04
Loss = 1.1926e-02, PNorm = 176.3246, GNorm = 0.2957, lr_0 = 3.4026e-04
Loss = 1.2733e-02, PNorm = 176.3348, GNorm = 0.2885, lr_0 = 3.4003e-04
Loss = 9.0593e-03, PNorm = 176.3469, GNorm = 0.1135, lr_0 = 3.3980e-04
Loss = 1.9621e-02, PNorm = 176.3617, GNorm = 0.6754, lr_0 = 3.3956e-04
Loss = 1.8741e-02, PNorm = 176.3764, GNorm = 0.1181, lr_0 = 3.3933e-04
Loss = 1.3095e-02, PNorm = 176.3895, GNorm = 0.4836, lr_0 = 3.3910e-04
Loss = 1.3387e-02, PNorm = 176.3961, GNorm = 0.1775, lr_0 = 3.3887e-04
Loss = 1.5551e-02, PNorm = 176.4081, GNorm = 0.4080, lr_0 = 3.3864e-04
Loss = 1.0356e-02, PNorm = 176.4226, GNorm = 0.2584, lr_0 = 3.3840e-04
Loss = 1.0268e-02, PNorm = 176.4357, GNorm = 0.2822, lr_0 = 3.3817e-04
Loss = 9.8839e-03, PNorm = 176.4493, GNorm = 0.2238, lr_0 = 3.3794e-04
Loss = 1.1562e-02, PNorm = 176.4640, GNorm = 0.3986, lr_0 = 3.3771e-04
Loss = 1.3760e-02, PNorm = 176.4806, GNorm = 0.3039, lr_0 = 3.3748e-04
Loss = 1.2956e-02, PNorm = 176.4931, GNorm = 0.1717, lr_0 = 3.3725e-04
Loss = 1.0114e-02, PNorm = 176.5057, GNorm = 0.3993, lr_0 = 3.3701e-04
Loss = 8.6179e-03, PNorm = 176.5145, GNorm = 0.1435, lr_0 = 3.3678e-04
Loss = 1.8956e-02, PNorm = 176.5227, GNorm = 0.5196, lr_0 = 3.3655e-04
Loss = 9.2897e-03, PNorm = 176.5347, GNorm = 0.1510, lr_0 = 3.3632e-04
Loss = 1.1336e-02, PNorm = 176.5467, GNorm = 0.1885, lr_0 = 3.3609e-04
Loss = 1.4408e-02, PNorm = 176.5565, GNorm = 0.2736, lr_0 = 3.3586e-04
Loss = 1.0879e-02, PNorm = 176.5690, GNorm = 0.2523, lr_0 = 3.3563e-04
Loss = 1.1141e-02, PNorm = 176.5833, GNorm = 0.1822, lr_0 = 3.3540e-04
Loss = 1.0375e-02, PNorm = 176.5950, GNorm = 0.1921, lr_0 = 3.3517e-04
Loss = 1.0994e-02, PNorm = 176.6068, GNorm = 0.1528, lr_0 = 3.3494e-04
Loss = 1.4815e-02, PNorm = 176.6168, GNorm = 0.5179, lr_0 = 3.3471e-04
Loss = 1.7602e-02, PNorm = 176.6310, GNorm = 0.1535, lr_0 = 3.3448e-04
Loss = 9.2691e-03, PNorm = 176.6473, GNorm = 0.2393, lr_0 = 3.3425e-04
Loss = 9.0613e-03, PNorm = 176.6611, GNorm = 0.1869, lr_0 = 3.3403e-04
Loss = 1.2129e-02, PNorm = 176.6713, GNorm = 0.3088, lr_0 = 3.3380e-04
Loss = 1.2938e-02, PNorm = 176.6845, GNorm = 0.4137, lr_0 = 3.3357e-04
Loss = 1.5815e-02, PNorm = 176.6993, GNorm = 0.2587, lr_0 = 3.3334e-04
Loss = 1.0812e-02, PNorm = 176.7119, GNorm = 0.4046, lr_0 = 3.3311e-04
Loss = 1.1202e-02, PNorm = 176.7252, GNorm = 0.2570, lr_0 = 3.3288e-04
Loss = 9.4665e-03, PNorm = 176.7366, GNorm = 0.3106, lr_0 = 3.3265e-04
Loss = 1.0075e-02, PNorm = 176.7463, GNorm = 0.1808, lr_0 = 3.3243e-04
Loss = 1.3884e-02, PNorm = 176.7540, GNorm = 0.2303, lr_0 = 3.3220e-04
Loss = 1.1109e-02, PNorm = 176.7663, GNorm = 0.3063, lr_0 = 3.3197e-04
Loss = 1.2667e-02, PNorm = 176.7785, GNorm = 0.1733, lr_0 = 3.3174e-04
Loss = 1.3951e-02, PNorm = 176.7924, GNorm = 0.2743, lr_0 = 3.3152e-04
Loss = 1.1950e-02, PNorm = 176.8074, GNorm = 0.4485, lr_0 = 3.3129e-04
Loss = 9.5898e-03, PNorm = 176.8224, GNorm = 0.3812, lr_0 = 3.3106e-04
Loss = 1.0881e-02, PNorm = 176.8365, GNorm = 0.3361, lr_0 = 3.3084e-04
Loss = 8.9598e-03, PNorm = 176.8489, GNorm = 0.1388, lr_0 = 3.3061e-04
Loss = 9.5133e-03, PNorm = 176.8604, GNorm = 0.1399, lr_0 = 3.3038e-04
Loss = 1.0022e-02, PNorm = 176.8706, GNorm = 0.2843, lr_0 = 3.3016e-04
Loss = 1.6960e-02, PNorm = 176.8801, GNorm = 0.1774, lr_0 = 3.2993e-04
Loss = 1.2803e-02, PNorm = 176.8925, GNorm = 0.3056, lr_0 = 3.2970e-04
Loss = 9.5931e-03, PNorm = 176.9052, GNorm = 0.2035, lr_0 = 3.2948e-04
Loss = 1.4755e-02, PNorm = 176.9179, GNorm = 0.2994, lr_0 = 3.2925e-04
Loss = 1.2330e-02, PNorm = 176.9331, GNorm = 0.3693, lr_0 = 3.2903e-04
Loss = 1.0199e-02, PNorm = 176.9464, GNorm = 0.2232, lr_0 = 3.2880e-04
Loss = 1.2536e-02, PNorm = 176.9573, GNorm = 0.1388, lr_0 = 3.2858e-04
Loss = 3.2559e-02, PNorm = 176.9740, GNorm = 0.7181, lr_0 = 3.2835e-04
Loss = 1.1199e-02, PNorm = 176.9865, GNorm = 0.2189, lr_0 = 3.2813e-04
Loss = 8.9628e-03, PNorm = 177.0024, GNorm = 0.3679, lr_0 = 3.2790e-04
Loss = 8.3585e-03, PNorm = 177.0163, GNorm = 0.2049, lr_0 = 3.2768e-04
Loss = 1.1436e-02, PNorm = 177.0275, GNorm = 0.1910, lr_0 = 3.2745e-04
Loss = 1.5546e-02, PNorm = 177.0374, GNorm = 0.2408, lr_0 = 3.2723e-04
Loss = 1.4487e-02, PNorm = 177.0525, GNorm = 0.4230, lr_0 = 3.2700e-04
Loss = 1.1997e-02, PNorm = 177.0650, GNorm = 0.2269, lr_0 = 3.2678e-04
Loss = 1.2735e-02, PNorm = 177.0769, GNorm = 0.2311, lr_0 = 3.2656e-04
Loss = 1.3179e-02, PNorm = 177.0880, GNorm = 0.1260, lr_0 = 3.2633e-04
Loss = 1.0580e-02, PNorm = 177.1003, GNorm = 0.3581, lr_0 = 3.2611e-04
Loss = 1.0625e-02, PNorm = 177.1154, GNorm = 0.1631, lr_0 = 3.2589e-04
Loss = 1.0031e-02, PNorm = 177.1304, GNorm = 0.1465, lr_0 = 3.2566e-04
Loss = 1.1356e-02, PNorm = 177.1467, GNorm = 0.2848, lr_0 = 3.2544e-04
Loss = 1.0502e-02, PNorm = 177.1615, GNorm = 0.1539, lr_0 = 3.2522e-04
Loss = 8.7946e-03, PNorm = 177.1734, GNorm = 0.1441, lr_0 = 3.2499e-04
Loss = 1.2828e-02, PNorm = 177.1839, GNorm = 0.2698, lr_0 = 3.2477e-04
Loss = 1.0900e-02, PNorm = 177.1961, GNorm = 0.3851, lr_0 = 3.2455e-04
Loss = 1.0182e-02, PNorm = 177.2089, GNorm = 0.1675, lr_0 = 3.2433e-04
Loss = 9.2921e-03, PNorm = 177.2219, GNorm = 0.2979, lr_0 = 3.2410e-04
Loss = 1.2805e-02, PNorm = 177.2333, GNorm = 0.1862, lr_0 = 3.2388e-04
Loss = 9.3534e-03, PNorm = 177.2481, GNorm = 0.2826, lr_0 = 3.2366e-04
Loss = 1.1534e-02, PNorm = 177.2632, GNorm = 0.3938, lr_0 = 3.2344e-04
Loss = 9.7274e-03, PNorm = 177.2759, GNorm = 0.5162, lr_0 = 3.2322e-04
Loss = 1.3324e-02, PNorm = 177.2863, GNorm = 0.3779, lr_0 = 3.2300e-04
Loss = 1.4969e-02, PNorm = 177.2974, GNorm = 0.3012, lr_0 = 3.2277e-04
Loss = 1.2241e-02, PNorm = 177.3119, GNorm = 0.3744, lr_0 = 3.2255e-04
Loss = 1.0197e-02, PNorm = 177.3255, GNorm = 0.2958, lr_0 = 3.2233e-04
Loss = 1.3702e-02, PNorm = 177.3409, GNorm = 0.4726, lr_0 = 3.2211e-04
Loss = 1.0187e-02, PNorm = 177.3539, GNorm = 0.2869, lr_0 = 3.2189e-04
Loss = 1.4601e-02, PNorm = 177.3650, GNorm = 0.2191, lr_0 = 3.2167e-04
Loss = 1.5063e-02, PNorm = 177.3760, GNorm = 0.1731, lr_0 = 3.2145e-04
Loss = 1.0861e-02, PNorm = 177.3914, GNorm = 0.3045, lr_0 = 3.2123e-04
Loss = 1.2007e-02, PNorm = 177.4084, GNorm = 0.5723, lr_0 = 3.2101e-04
Loss = 1.1343e-02, PNorm = 177.4248, GNorm = 0.1591, lr_0 = 3.2079e-04
Loss = 1.1824e-02, PNorm = 177.4364, GNorm = 0.1799, lr_0 = 3.2057e-04
Loss = 9.9018e-03, PNorm = 177.4459, GNorm = 0.1679, lr_0 = 3.2035e-04
Loss = 1.3747e-02, PNorm = 177.4590, GNorm = 0.5282, lr_0 = 3.2013e-04
Loss = 1.1176e-02, PNorm = 177.4727, GNorm = 0.3156, lr_0 = 3.1991e-04
Loss = 1.3218e-02, PNorm = 177.4907, GNorm = 0.3578, lr_0 = 3.1969e-04
Loss = 8.5960e-03, PNorm = 177.5054, GNorm = 0.3201, lr_0 = 3.1947e-04
Loss = 1.2053e-02, PNorm = 177.5178, GNorm = 0.2671, lr_0 = 3.1925e-04
Loss = 7.8951e-03, PNorm = 177.5296, GNorm = 0.1596, lr_0 = 3.1904e-04
Loss = 1.2615e-02, PNorm = 177.5419, GNorm = 0.5216, lr_0 = 3.1882e-04
Loss = 1.1974e-02, PNorm = 177.5575, GNorm = 0.2061, lr_0 = 3.1860e-04
Loss = 1.1541e-02, PNorm = 177.5728, GNorm = 0.2270, lr_0 = 3.1838e-04
Loss = 8.7181e-03, PNorm = 177.5881, GNorm = 0.1583, lr_0 = 3.1816e-04
Loss = 1.3352e-02, PNorm = 177.6034, GNorm = 0.1450, lr_0 = 3.1794e-04
Loss = 1.0504e-02, PNorm = 177.6170, GNorm = 0.2951, lr_0 = 3.1773e-04
Loss = 1.1797e-02, PNorm = 177.6308, GNorm = 0.4781, lr_0 = 3.1751e-04
Loss = 1.5567e-02, PNorm = 177.6467, GNorm = 0.1127, lr_0 = 3.1729e-04
Loss = 2.0102e-02, PNorm = 177.6609, GNorm = 0.2492, lr_0 = 3.1707e-04
Loss = 1.6240e-02, PNorm = 177.6719, GNorm = 0.2513, lr_0 = 3.1686e-04
Loss = 1.5550e-02, PNorm = 177.6840, GNorm = 0.2223, lr_0 = 3.1664e-04
Loss = 1.3333e-02, PNorm = 177.6985, GNorm = 0.2672, lr_0 = 3.1642e-04
Loss = 1.8124e-02, PNorm = 177.7111, GNorm = 0.5458, lr_0 = 3.1621e-04
Validation mae = 0.121340
Epoch 16
Loss = 9.0593e-03, PNorm = 177.7226, GNorm = 0.7660, lr_0 = 3.1599e-04
Loss = 1.1686e-02, PNorm = 177.7316, GNorm = 0.3928, lr_0 = 3.1577e-04
Loss = 1.2257e-02, PNorm = 177.7417, GNorm = 0.2016, lr_0 = 3.1556e-04
Loss = 1.0025e-02, PNorm = 177.7525, GNorm = 0.3681, lr_0 = 3.1534e-04
Loss = 1.2346e-02, PNorm = 177.7617, GNorm = 0.3354, lr_0 = 3.1512e-04
Loss = 1.2680e-02, PNorm = 177.7712, GNorm = 0.5368, lr_0 = 3.1491e-04
Loss = 9.9284e-03, PNorm = 177.7804, GNorm = 0.3685, lr_0 = 3.1469e-04
Loss = 1.2560e-02, PNorm = 177.7923, GNorm = 0.3271, lr_0 = 3.1448e-04
Loss = 1.1066e-02, PNorm = 177.8049, GNorm = 0.3762, lr_0 = 3.1426e-04
Loss = 1.1557e-02, PNorm = 177.8176, GNorm = 0.1478, lr_0 = 3.1405e-04
Loss = 1.0923e-02, PNorm = 177.8273, GNorm = 0.2309, lr_0 = 3.1383e-04
Loss = 9.5310e-03, PNorm = 177.8364, GNorm = 0.3626, lr_0 = 3.1362e-04
Loss = 9.9391e-03, PNorm = 177.8458, GNorm = 0.1707, lr_0 = 3.1340e-04
Loss = 1.0575e-02, PNorm = 177.8540, GNorm = 0.1640, lr_0 = 3.1319e-04
Loss = 8.9579e-03, PNorm = 177.8642, GNorm = 0.3822, lr_0 = 3.1297e-04
Loss = 1.0072e-02, PNorm = 177.8743, GNorm = 0.2345, lr_0 = 3.1276e-04
Loss = 8.8742e-03, PNorm = 177.8842, GNorm = 0.1418, lr_0 = 3.1254e-04
Loss = 1.0691e-02, PNorm = 177.8940, GNorm = 0.1889, lr_0 = 3.1233e-04
Loss = 9.2689e-03, PNorm = 177.9042, GNorm = 0.1016, lr_0 = 3.1212e-04
Loss = 1.5710e-02, PNorm = 177.9136, GNorm = 0.2247, lr_0 = 3.1190e-04
Loss = 2.3154e-02, PNorm = 177.9250, GNorm = 0.4054, lr_0 = 3.1169e-04
Loss = 1.0914e-02, PNorm = 177.9348, GNorm = 0.4452, lr_0 = 3.1147e-04
Loss = 1.6765e-02, PNorm = 177.9472, GNorm = 0.2052, lr_0 = 3.1126e-04
Loss = 1.0583e-02, PNorm = 177.9564, GNorm = 0.1630, lr_0 = 3.1105e-04
Loss = 7.6903e-03, PNorm = 177.9644, GNorm = 0.2128, lr_0 = 3.1083e-04
Loss = 1.1895e-02, PNorm = 177.9722, GNorm = 0.3150, lr_0 = 3.1062e-04
Loss = 9.2012e-03, PNorm = 177.9855, GNorm = 0.1059, lr_0 = 3.1041e-04
Loss = 9.3606e-03, PNorm = 177.9984, GNorm = 0.1990, lr_0 = 3.1020e-04
Loss = 1.0302e-02, PNorm = 178.0093, GNorm = 0.3280, lr_0 = 3.0998e-04
Loss = 7.9150e-03, PNorm = 178.0185, GNorm = 0.2064, lr_0 = 3.0977e-04
Loss = 1.3192e-02, PNorm = 178.0290, GNorm = 0.3002, lr_0 = 3.0956e-04
Loss = 8.5808e-03, PNorm = 178.0393, GNorm = 0.5421, lr_0 = 3.0935e-04
Loss = 1.2469e-02, PNorm = 178.0492, GNorm = 0.1310, lr_0 = 3.0914e-04
Loss = 9.7035e-03, PNorm = 178.0576, GNorm = 0.2700, lr_0 = 3.0892e-04
Loss = 9.0400e-03, PNorm = 178.0671, GNorm = 0.1393, lr_0 = 3.0871e-04
Loss = 1.1528e-02, PNorm = 178.0762, GNorm = 0.2680, lr_0 = 3.0850e-04
Loss = 1.0255e-02, PNorm = 178.0876, GNorm = 0.2229, lr_0 = 3.0829e-04
Loss = 9.2415e-03, PNorm = 178.1014, GNorm = 0.3032, lr_0 = 3.0808e-04
Loss = 9.8460e-03, PNorm = 178.1146, GNorm = 0.2806, lr_0 = 3.0787e-04
Loss = 9.4867e-03, PNorm = 178.1287, GNorm = 0.2342, lr_0 = 3.0766e-04
Loss = 7.8405e-03, PNorm = 178.1399, GNorm = 0.1230, lr_0 = 3.0745e-04
Loss = 1.7109e-02, PNorm = 178.1481, GNorm = 0.2937, lr_0 = 3.0723e-04
Loss = 8.1892e-03, PNorm = 178.1554, GNorm = 0.2864, lr_0 = 3.0702e-04
Loss = 1.0357e-02, PNorm = 178.1640, GNorm = 0.2258, lr_0 = 3.0681e-04
Loss = 8.7402e-03, PNorm = 178.1738, GNorm = 0.1224, lr_0 = 3.0660e-04
Loss = 8.4460e-03, PNorm = 178.1851, GNorm = 0.1619, lr_0 = 3.0639e-04
Loss = 1.4079e-02, PNorm = 178.1971, GNorm = 0.4342, lr_0 = 3.0618e-04
Loss = 8.3180e-03, PNorm = 178.2070, GNorm = 0.1934, lr_0 = 3.0597e-04
Loss = 1.1715e-02, PNorm = 178.2179, GNorm = 0.2206, lr_0 = 3.0576e-04
Loss = 1.4437e-02, PNorm = 178.2341, GNorm = 0.2899, lr_0 = 3.0555e-04
Loss = 1.1782e-02, PNorm = 178.2477, GNorm = 0.1720, lr_0 = 3.0535e-04
Loss = 9.0311e-03, PNorm = 178.2597, GNorm = 0.1648, lr_0 = 3.0514e-04
Loss = 8.7984e-03, PNorm = 178.2697, GNorm = 0.2311, lr_0 = 3.0493e-04
Loss = 8.4993e-03, PNorm = 178.2774, GNorm = 0.1407, lr_0 = 3.0472e-04
Loss = 7.8308e-03, PNorm = 178.2868, GNorm = 0.2789, lr_0 = 3.0451e-04
Loss = 1.2251e-02, PNorm = 178.2953, GNorm = 0.5729, lr_0 = 3.0430e-04
Loss = 1.0737e-02, PNorm = 178.3070, GNorm = 0.2511, lr_0 = 3.0409e-04
Loss = 8.5061e-03, PNorm = 178.3176, GNorm = 0.2633, lr_0 = 3.0388e-04
Loss = 1.0682e-02, PNorm = 178.3262, GNorm = 0.6199, lr_0 = 3.0368e-04
Loss = 1.1828e-02, PNorm = 178.3395, GNorm = 0.2923, lr_0 = 3.0347e-04
Loss = 1.0487e-02, PNorm = 178.3501, GNorm = 0.2406, lr_0 = 3.0326e-04
Loss = 1.2880e-02, PNorm = 178.3592, GNorm = 0.2812, lr_0 = 3.0305e-04
Loss = 7.8523e-03, PNorm = 178.3676, GNorm = 0.1974, lr_0 = 3.0284e-04
Loss = 7.5326e-03, PNorm = 178.3756, GNorm = 0.3139, lr_0 = 3.0264e-04
Loss = 8.1194e-03, PNorm = 178.3869, GNorm = 0.2459, lr_0 = 3.0243e-04
Loss = 8.5237e-03, PNorm = 178.3971, GNorm = 0.3795, lr_0 = 3.0222e-04
Loss = 8.0688e-03, PNorm = 178.4090, GNorm = 0.1888, lr_0 = 3.0202e-04
Loss = 7.1658e-03, PNorm = 178.4202, GNorm = 0.3732, lr_0 = 3.0181e-04
Loss = 9.1877e-03, PNorm = 178.4303, GNorm = 0.1669, lr_0 = 3.0160e-04
Loss = 8.0404e-03, PNorm = 178.4404, GNorm = 0.5548, lr_0 = 3.0140e-04
Loss = 1.2880e-02, PNorm = 178.4543, GNorm = 0.2840, lr_0 = 3.0119e-04
Loss = 8.9819e-03, PNorm = 178.4690, GNorm = 0.4962, lr_0 = 3.0098e-04
Loss = 9.4459e-03, PNorm = 178.4802, GNorm = 0.4574, lr_0 = 3.0078e-04
Loss = 8.4500e-03, PNorm = 178.4883, GNorm = 0.1312, lr_0 = 3.0057e-04
Loss = 1.0789e-02, PNorm = 178.4957, GNorm = 0.2064, lr_0 = 3.0036e-04
Loss = 1.3475e-02, PNorm = 178.5036, GNorm = 0.2797, lr_0 = 3.0016e-04
Loss = 1.0234e-02, PNorm = 178.5123, GNorm = 0.2220, lr_0 = 2.9995e-04
Loss = 1.0832e-02, PNorm = 178.5235, GNorm = 0.2471, lr_0 = 2.9975e-04
Loss = 8.1296e-03, PNorm = 178.5350, GNorm = 0.1602, lr_0 = 2.9954e-04
Loss = 1.0581e-02, PNorm = 178.5452, GNorm = 0.1660, lr_0 = 2.9934e-04
Loss = 1.2137e-02, PNorm = 178.5552, GNorm = 0.1444, lr_0 = 2.9913e-04
Loss = 1.5007e-02, PNorm = 178.5632, GNorm = 0.3718, lr_0 = 2.9893e-04
Loss = 8.6691e-03, PNorm = 178.5754, GNorm = 0.4935, lr_0 = 2.9872e-04
Loss = 7.2349e-03, PNorm = 178.5851, GNorm = 0.2244, lr_0 = 2.9852e-04
Loss = 1.0361e-02, PNorm = 178.5968, GNorm = 0.4184, lr_0 = 2.9831e-04
Loss = 9.7853e-03, PNorm = 178.6090, GNorm = 0.1628, lr_0 = 2.9811e-04
Loss = 8.2730e-03, PNorm = 178.6209, GNorm = 0.2773, lr_0 = 2.9790e-04
Loss = 1.0447e-02, PNorm = 178.6294, GNorm = 0.5980, lr_0 = 2.9770e-04
Loss = 1.2466e-02, PNorm = 178.6406, GNorm = 0.2433, lr_0 = 2.9750e-04
Loss = 9.7891e-03, PNorm = 178.6480, GNorm = 0.2294, lr_0 = 2.9729e-04
Loss = 1.2540e-02, PNorm = 178.6595, GNorm = 0.1580, lr_0 = 2.9709e-04
Loss = 1.1079e-02, PNorm = 178.6732, GNorm = 0.3267, lr_0 = 2.9689e-04
Loss = 9.7334e-03, PNorm = 178.6852, GNorm = 0.2101, lr_0 = 2.9668e-04
Loss = 8.6723e-03, PNorm = 178.6993, GNorm = 0.1432, lr_0 = 2.9648e-04
Loss = 1.7785e-02, PNorm = 178.7101, GNorm = 0.1781, lr_0 = 2.9628e-04
Loss = 1.3879e-02, PNorm = 178.7212, GNorm = 0.3123, lr_0 = 2.9607e-04
Loss = 1.4926e-02, PNorm = 178.7327, GNorm = 0.4551, lr_0 = 2.9587e-04
Loss = 9.4239e-03, PNorm = 178.7483, GNorm = 0.1667, lr_0 = 2.9567e-04
Loss = 9.0939e-03, PNorm = 178.7612, GNorm = 0.1968, lr_0 = 2.9546e-04
Loss = 9.8000e-03, PNorm = 178.7725, GNorm = 0.2812, lr_0 = 2.9526e-04
Loss = 8.8724e-03, PNorm = 178.7865, GNorm = 0.2736, lr_0 = 2.9506e-04
Loss = 1.3845e-02, PNorm = 178.7987, GNorm = 1.7824, lr_0 = 2.9486e-04
Loss = 8.8806e-03, PNorm = 178.8060, GNorm = 0.2634, lr_0 = 2.9466e-04
Loss = 9.9295e-03, PNorm = 178.8183, GNorm = 0.2939, lr_0 = 2.9445e-04
Loss = 1.0311e-02, PNorm = 178.8328, GNorm = 0.1257, lr_0 = 2.9425e-04
Loss = 9.8077e-03, PNorm = 178.8478, GNorm = 0.4077, lr_0 = 2.9405e-04
Loss = 1.1995e-02, PNorm = 178.8580, GNorm = 0.3078, lr_0 = 2.9385e-04
Loss = 1.0040e-02, PNorm = 178.8708, GNorm = 0.1325, lr_0 = 2.9365e-04
Loss = 1.6671e-02, PNorm = 178.8829, GNorm = 1.4148, lr_0 = 2.9345e-04
Loss = 8.6732e-03, PNorm = 178.8955, GNorm = 0.1579, lr_0 = 2.9325e-04
Loss = 9.9676e-03, PNorm = 178.9086, GNorm = 0.1760, lr_0 = 2.9305e-04
Loss = 1.0159e-02, PNorm = 178.9208, GNorm = 0.2285, lr_0 = 2.9284e-04
Loss = 1.3848e-02, PNorm = 178.9333, GNorm = 0.7227, lr_0 = 2.9264e-04
Loss = 1.2352e-02, PNorm = 178.9449, GNorm = 0.4669, lr_0 = 2.9244e-04
Loss = 1.3126e-02, PNorm = 178.9565, GNorm = 0.3544, lr_0 = 2.9224e-04
Loss = 1.9412e-02, PNorm = 178.9650, GNorm = 0.3414, lr_0 = 2.9204e-04
Loss = 1.2656e-02, PNorm = 178.9776, GNorm = 0.1829, lr_0 = 2.9184e-04
Loss = 1.2604e-02, PNorm = 178.9909, GNorm = 0.2234, lr_0 = 2.9164e-04
Loss = 9.2774e-03, PNorm = 179.0048, GNorm = 0.3880, lr_0 = 2.9144e-04
Loss = 2.0880e-02, PNorm = 179.0196, GNorm = 0.2087, lr_0 = 2.9124e-04
Validation mae = 0.121269
Epoch 17
Loss = 1.2831e-02, PNorm = 179.0329, GNorm = 0.2388, lr_0 = 2.9104e-04
Loss = 9.3186e-03, PNorm = 179.0401, GNorm = 0.1579, lr_0 = 2.9084e-04
Loss = 1.5212e-02, PNorm = 179.0467, GNorm = 0.3383, lr_0 = 2.9065e-04
Loss = 7.9639e-03, PNorm = 179.0579, GNorm = 0.1759, lr_0 = 2.9045e-04
Loss = 9.3280e-03, PNorm = 179.0686, GNorm = 0.2644, lr_0 = 2.9025e-04
Loss = 8.5549e-03, PNorm = 179.0790, GNorm = 0.1424, lr_0 = 2.9005e-04
Loss = 7.8514e-03, PNorm = 179.0867, GNorm = 0.2231, lr_0 = 2.8985e-04
Loss = 9.7595e-03, PNorm = 179.0932, GNorm = 0.2063, lr_0 = 2.8965e-04
Loss = 1.3376e-02, PNorm = 179.1032, GNorm = 0.1788, lr_0 = 2.8945e-04
Loss = 1.0405e-02, PNorm = 179.1108, GNorm = 0.1923, lr_0 = 2.8925e-04
Loss = 8.2901e-03, PNorm = 179.1203, GNorm = 0.2605, lr_0 = 2.8906e-04
Loss = 9.5835e-03, PNorm = 179.1290, GNorm = 0.1346, lr_0 = 2.8886e-04
Loss = 7.7413e-03, PNorm = 179.1378, GNorm = 0.1059, lr_0 = 2.8866e-04
Loss = 8.1410e-03, PNorm = 179.1456, GNorm = 0.2174, lr_0 = 2.8846e-04
Loss = 8.2677e-03, PNorm = 179.1545, GNorm = 0.2100, lr_0 = 2.8826e-04
Loss = 8.4140e-03, PNorm = 179.1616, GNorm = 0.2545, lr_0 = 2.8807e-04
Loss = 8.0561e-03, PNorm = 179.1697, GNorm = 0.1408, lr_0 = 2.8787e-04
Loss = 9.5268e-03, PNorm = 179.1785, GNorm = 0.3484, lr_0 = 2.8767e-04
Loss = 7.3653e-03, PNorm = 179.1878, GNorm = 0.1116, lr_0 = 2.8748e-04
Loss = 8.1294e-03, PNorm = 179.1941, GNorm = 0.1109, lr_0 = 2.8728e-04
Loss = 1.0323e-02, PNorm = 179.2024, GNorm = 0.4157, lr_0 = 2.8708e-04
Loss = 8.8907e-03, PNorm = 179.2145, GNorm = 0.0921, lr_0 = 2.8689e-04
Loss = 7.8520e-03, PNorm = 179.2227, GNorm = 0.2005, lr_0 = 2.8669e-04
Loss = 8.3763e-03, PNorm = 179.2305, GNorm = 0.4172, lr_0 = 2.8649e-04
Loss = 8.6671e-03, PNorm = 179.2420, GNorm = 0.1891, lr_0 = 2.8630e-04
Loss = 8.9574e-03, PNorm = 179.2521, GNorm = 0.1350, lr_0 = 2.8610e-04
Loss = 7.8546e-03, PNorm = 179.2614, GNorm = 0.2355, lr_0 = 2.8590e-04
Loss = 9.5029e-03, PNorm = 179.2701, GNorm = 0.2574, lr_0 = 2.8571e-04
Loss = 9.4891e-03, PNorm = 179.2779, GNorm = 0.1376, lr_0 = 2.8551e-04
Loss = 6.4137e-03, PNorm = 179.2841, GNorm = 0.1806, lr_0 = 2.8532e-04
Loss = 9.0147e-03, PNorm = 179.2906, GNorm = 0.3350, lr_0 = 2.8512e-04
Loss = 1.0379e-02, PNorm = 179.2993, GNorm = 0.1317, lr_0 = 2.8493e-04
Loss = 9.9179e-03, PNorm = 179.3084, GNorm = 0.1595, lr_0 = 2.8473e-04
Loss = 1.0813e-02, PNorm = 179.3180, GNorm = 0.2419, lr_0 = 2.8454e-04
Loss = 7.5817e-03, PNorm = 179.3280, GNorm = 0.1024, lr_0 = 2.8434e-04
Loss = 8.1519e-03, PNorm = 179.3366, GNorm = 0.3426, lr_0 = 2.8415e-04
Loss = 8.6893e-03, PNorm = 179.3439, GNorm = 0.2192, lr_0 = 2.8395e-04
Loss = 1.1756e-02, PNorm = 179.3500, GNorm = 0.1940, lr_0 = 2.8376e-04
Loss = 7.0829e-03, PNorm = 179.3584, GNorm = 0.1503, lr_0 = 2.8356e-04
Loss = 7.0018e-03, PNorm = 179.3668, GNorm = 0.1389, lr_0 = 2.8337e-04
Loss = 1.1323e-02, PNorm = 179.3781, GNorm = 0.4365, lr_0 = 2.8317e-04
Loss = 1.4188e-02, PNorm = 179.3881, GNorm = 0.1721, lr_0 = 2.8298e-04
Loss = 7.6230e-03, PNorm = 179.3965, GNorm = 0.1484, lr_0 = 2.8279e-04
Loss = 1.1273e-02, PNorm = 179.4078, GNorm = 0.1930, lr_0 = 2.8259e-04
Loss = 9.5891e-03, PNorm = 179.4181, GNorm = 0.1202, lr_0 = 2.8240e-04
Loss = 1.2360e-02, PNorm = 179.4279, GNorm = 0.1545, lr_0 = 2.8221e-04
Loss = 7.3722e-03, PNorm = 179.4379, GNorm = 0.2326, lr_0 = 2.8201e-04
Loss = 1.1905e-02, PNorm = 179.4466, GNorm = 0.1564, lr_0 = 2.8182e-04
Loss = 7.5259e-03, PNorm = 179.4575, GNorm = 0.1502, lr_0 = 2.8163e-04
Loss = 7.2884e-03, PNorm = 179.4663, GNorm = 0.1452, lr_0 = 2.8143e-04
Loss = 1.0175e-02, PNorm = 179.4734, GNorm = 0.2688, lr_0 = 2.8124e-04
Loss = 9.4840e-03, PNorm = 179.4833, GNorm = 0.3150, lr_0 = 2.8105e-04
Loss = 7.7344e-03, PNorm = 179.4910, GNorm = 0.2901, lr_0 = 2.8085e-04
Loss = 1.8572e-02, PNorm = 179.4993, GNorm = 0.4041, lr_0 = 2.8066e-04
Loss = 1.1329e-02, PNorm = 179.5076, GNorm = 0.1811, lr_0 = 2.8047e-04
Loss = 7.6782e-03, PNorm = 179.5192, GNorm = 0.1574, lr_0 = 2.8028e-04
Loss = 9.7896e-03, PNorm = 179.5304, GNorm = 0.1855, lr_0 = 2.8009e-04
Loss = 7.6628e-03, PNorm = 179.5394, GNorm = 0.2366, lr_0 = 2.7989e-04
Loss = 8.8836e-03, PNorm = 179.5479, GNorm = 0.2933, lr_0 = 2.7970e-04
Loss = 6.8338e-03, PNorm = 179.5561, GNorm = 0.1658, lr_0 = 2.7951e-04
Loss = 6.2870e-03, PNorm = 179.5674, GNorm = 0.5298, lr_0 = 2.7932e-04
Loss = 1.0570e-02, PNorm = 179.5798, GNorm = 0.1090, lr_0 = 2.7913e-04
Loss = 1.0116e-02, PNorm = 179.5878, GNorm = 0.3144, lr_0 = 2.7894e-04
Loss = 1.0012e-02, PNorm = 179.5961, GNorm = 0.3301, lr_0 = 2.7875e-04
Loss = 8.4744e-03, PNorm = 179.6048, GNorm = 0.4402, lr_0 = 2.7855e-04
Loss = 6.1413e-03, PNorm = 179.6095, GNorm = 0.1346, lr_0 = 2.7836e-04
Loss = 8.0955e-03, PNorm = 179.6187, GNorm = 0.1037, lr_0 = 2.7817e-04
Loss = 7.2660e-03, PNorm = 179.6277, GNorm = 0.1272, lr_0 = 2.7798e-04
Loss = 8.3964e-03, PNorm = 179.6320, GNorm = 0.1105, lr_0 = 2.7779e-04
Loss = 7.7081e-03, PNorm = 179.6379, GNorm = 0.2057, lr_0 = 2.7760e-04
Loss = 8.4049e-03, PNorm = 179.6437, GNorm = 0.1998, lr_0 = 2.7741e-04
Loss = 1.1802e-02, PNorm = 179.6556, GNorm = 0.1370, lr_0 = 2.7722e-04
Loss = 7.0468e-03, PNorm = 179.6686, GNorm = 0.1250, lr_0 = 2.7703e-04
Loss = 1.3678e-02, PNorm = 179.6786, GNorm = 0.3927, lr_0 = 2.7684e-04
Loss = 7.6617e-03, PNorm = 179.6890, GNorm = 0.1079, lr_0 = 2.7665e-04
Loss = 1.4957e-02, PNorm = 179.6987, GNorm = 0.1585, lr_0 = 2.7646e-04
Loss = 7.8474e-03, PNorm = 179.7091, GNorm = 0.1446, lr_0 = 2.7627e-04
Loss = 7.0056e-03, PNorm = 179.7184, GNorm = 0.3539, lr_0 = 2.7608e-04
Loss = 8.4107e-03, PNorm = 179.7248, GNorm = 0.2489, lr_0 = 2.7590e-04
Loss = 1.2137e-02, PNorm = 179.7355, GNorm = 0.2113, lr_0 = 2.7571e-04
Loss = 7.2500e-03, PNorm = 179.7480, GNorm = 0.2025, lr_0 = 2.7552e-04
Loss = 1.2896e-02, PNorm = 179.7562, GNorm = 0.2441, lr_0 = 2.7533e-04
Loss = 1.0425e-02, PNorm = 179.7654, GNorm = 0.2414, lr_0 = 2.7514e-04
Loss = 6.0364e-03, PNorm = 179.7742, GNorm = 0.1096, lr_0 = 2.7495e-04
Loss = 8.5692e-03, PNorm = 179.7803, GNorm = 0.3119, lr_0 = 2.7476e-04
Loss = 8.4353e-03, PNorm = 179.7896, GNorm = 0.1328, lr_0 = 2.7457e-04
Loss = 9.7597e-03, PNorm = 179.8027, GNorm = 0.3638, lr_0 = 2.7439e-04
Loss = 7.3904e-03, PNorm = 179.8152, GNorm = 0.2040, lr_0 = 2.7420e-04
Loss = 1.1259e-02, PNorm = 179.8257, GNorm = 0.2206, lr_0 = 2.7401e-04
Loss = 9.8121e-03, PNorm = 179.8340, GNorm = 0.1979, lr_0 = 2.7382e-04
Loss = 9.9291e-03, PNorm = 179.8430, GNorm = 0.2563, lr_0 = 2.7364e-04
Loss = 1.2967e-02, PNorm = 179.8531, GNorm = 0.2772, lr_0 = 2.7345e-04
Loss = 8.5971e-03, PNorm = 179.8601, GNorm = 0.7192, lr_0 = 2.7326e-04
Loss = 1.0821e-02, PNorm = 179.8650, GNorm = 0.2507, lr_0 = 2.7307e-04
Loss = 8.0929e-03, PNorm = 179.8739, GNorm = 0.2827, lr_0 = 2.7289e-04
Loss = 2.4394e-02, PNorm = 179.8847, GNorm = 0.2378, lr_0 = 2.7270e-04
Loss = 7.9383e-03, PNorm = 179.8967, GNorm = 0.2155, lr_0 = 2.7251e-04
Loss = 7.2404e-03, PNorm = 179.9062, GNorm = 0.1399, lr_0 = 2.7233e-04
Loss = 7.9628e-03, PNorm = 179.9145, GNorm = 0.1570, lr_0 = 2.7214e-04
Loss = 8.2244e-03, PNorm = 179.9235, GNorm = 0.1958, lr_0 = 2.7195e-04
Loss = 7.8761e-03, PNorm = 179.9346, GNorm = 0.2951, lr_0 = 2.7177e-04
Loss = 9.8239e-03, PNorm = 179.9443, GNorm = 0.6570, lr_0 = 2.7158e-04
Loss = 1.0119e-02, PNorm = 179.9546, GNorm = 0.3356, lr_0 = 2.7139e-04
Loss = 1.0004e-02, PNorm = 179.9652, GNorm = 0.1966, lr_0 = 2.7121e-04
Loss = 8.7322e-03, PNorm = 179.9762, GNorm = 0.4249, lr_0 = 2.7102e-04
Loss = 9.6904e-03, PNorm = 179.9869, GNorm = 0.0969, lr_0 = 2.7084e-04
Loss = 9.4561e-03, PNorm = 179.9971, GNorm = 0.2626, lr_0 = 2.7065e-04
Loss = 1.0262e-02, PNorm = 180.0070, GNorm = 0.1538, lr_0 = 2.7047e-04
Loss = 9.0062e-03, PNorm = 180.0159, GNorm = 0.2695, lr_0 = 2.7028e-04
Loss = 9.6871e-03, PNorm = 180.0277, GNorm = 0.2232, lr_0 = 2.7010e-04
Loss = 6.9672e-03, PNorm = 180.0365, GNorm = 0.2749, lr_0 = 2.6991e-04
Loss = 9.7687e-03, PNorm = 180.0454, GNorm = 0.1042, lr_0 = 2.6973e-04
Loss = 8.8332e-03, PNorm = 180.0559, GNorm = 0.2217, lr_0 = 2.6954e-04
Loss = 9.6387e-03, PNorm = 180.0615, GNorm = 0.2081, lr_0 = 2.6936e-04
Loss = 9.4192e-03, PNorm = 180.0701, GNorm = 0.3539, lr_0 = 2.6917e-04
Loss = 8.0906e-03, PNorm = 180.0808, GNorm = 0.1104, lr_0 = 2.6899e-04
Loss = 8.8004e-03, PNorm = 180.0904, GNorm = 0.4700, lr_0 = 2.6880e-04
Loss = 6.5696e-03, PNorm = 180.0989, GNorm = 0.1377, lr_0 = 2.6862e-04
Loss = 7.4391e-03, PNorm = 180.1065, GNorm = 0.1439, lr_0 = 2.6844e-04
Loss = 1.2139e-02, PNorm = 180.1160, GNorm = 0.2604, lr_0 = 2.6825e-04
Validation mae = 0.121202
Epoch 18
Loss = 9.9801e-03, PNorm = 180.1257, GNorm = 0.3676, lr_0 = 2.6807e-04
Loss = 8.1094e-03, PNorm = 180.1330, GNorm = 0.1859, lr_0 = 2.6788e-04
Loss = 8.4495e-03, PNorm = 180.1402, GNorm = 0.1592, lr_0 = 2.6770e-04
Loss = 7.1781e-03, PNorm = 180.1473, GNorm = 0.1603, lr_0 = 2.6752e-04
Loss = 6.1286e-03, PNorm = 180.1561, GNorm = 0.3840, lr_0 = 2.6733e-04
Loss = 9.8612e-03, PNorm = 180.1633, GNorm = 0.1141, lr_0 = 2.6715e-04
Loss = 6.4278e-03, PNorm = 180.1724, GNorm = 0.1568, lr_0 = 2.6697e-04
Loss = 7.9616e-03, PNorm = 180.1780, GNorm = 0.1773, lr_0 = 2.6678e-04
Loss = 7.4684e-03, PNorm = 180.1856, GNorm = 0.1342, lr_0 = 2.6660e-04
Loss = 6.4557e-03, PNorm = 180.1936, GNorm = 0.1418, lr_0 = 2.6642e-04
Loss = 5.7256e-03, PNorm = 180.2006, GNorm = 0.1184, lr_0 = 2.6624e-04
Loss = 8.2534e-03, PNorm = 180.2095, GNorm = 0.1954, lr_0 = 2.6605e-04
Loss = 1.0199e-02, PNorm = 180.2164, GNorm = 0.1007, lr_0 = 2.6587e-04
Loss = 1.1044e-02, PNorm = 180.2242, GNorm = 0.5379, lr_0 = 2.6569e-04
Loss = 6.3090e-03, PNorm = 180.2290, GNorm = 0.1670, lr_0 = 2.6551e-04
Loss = 6.9633e-03, PNorm = 180.2362, GNorm = 0.1284, lr_0 = 2.6533e-04
Loss = 6.8348e-03, PNorm = 180.2428, GNorm = 0.1557, lr_0 = 2.6514e-04
Loss = 1.1138e-02, PNorm = 180.2512, GNorm = 0.3495, lr_0 = 2.6496e-04
Loss = 6.0949e-03, PNorm = 180.2586, GNorm = 0.2832, lr_0 = 2.6478e-04
Loss = 9.0611e-03, PNorm = 180.2644, GNorm = 0.1369, lr_0 = 2.6460e-04
Loss = 7.3794e-03, PNorm = 180.2703, GNorm = 0.1788, lr_0 = 2.6442e-04
Loss = 7.1518e-03, PNorm = 180.2760, GNorm = 0.2742, lr_0 = 2.6424e-04
Loss = 7.7219e-03, PNorm = 180.2850, GNorm = 0.4806, lr_0 = 2.6406e-04
Loss = 6.3805e-03, PNorm = 180.2930, GNorm = 0.2154, lr_0 = 2.6388e-04
Loss = 6.4467e-03, PNorm = 180.2981, GNorm = 0.2554, lr_0 = 2.6369e-04
Loss = 6.0660e-03, PNorm = 180.3039, GNorm = 0.2845, lr_0 = 2.6351e-04
Loss = 6.6779e-03, PNorm = 180.3082, GNorm = 0.2942, lr_0 = 2.6333e-04
Loss = 7.9309e-03, PNorm = 180.3142, GNorm = 0.4505, lr_0 = 2.6315e-04
Loss = 6.6963e-03, PNorm = 180.3193, GNorm = 0.2355, lr_0 = 2.6297e-04
Loss = 7.2105e-03, PNorm = 180.3260, GNorm = 0.3545, lr_0 = 2.6279e-04
Loss = 6.7346e-03, PNorm = 180.3295, GNorm = 0.1287, lr_0 = 2.6261e-04
Loss = 8.0346e-03, PNorm = 180.3340, GNorm = 0.2387, lr_0 = 2.6243e-04
Loss = 9.3695e-03, PNorm = 180.3408, GNorm = 0.1651, lr_0 = 2.6225e-04
Loss = 4.9324e-03, PNorm = 180.3479, GNorm = 0.1413, lr_0 = 2.6207e-04
Loss = 1.5795e-02, PNorm = 180.3557, GNorm = 0.1871, lr_0 = 2.6189e-04
Loss = 9.7945e-03, PNorm = 180.3640, GNorm = 0.1580, lr_0 = 2.6171e-04
Loss = 8.2243e-03, PNorm = 180.3714, GNorm = 0.1889, lr_0 = 2.6153e-04
Loss = 8.0698e-03, PNorm = 180.3800, GNorm = 0.2045, lr_0 = 2.6136e-04
Loss = 7.5384e-03, PNorm = 180.3854, GNorm = 0.2075, lr_0 = 2.6118e-04
Loss = 9.4603e-03, PNorm = 180.3927, GNorm = 0.0954, lr_0 = 2.6100e-04
Loss = 8.1246e-03, PNorm = 180.3987, GNorm = 0.7528, lr_0 = 2.6082e-04
Loss = 5.8329e-03, PNorm = 180.4018, GNorm = 0.1248, lr_0 = 2.6064e-04
Loss = 6.4478e-03, PNorm = 180.4084, GNorm = 0.2144, lr_0 = 2.6046e-04
Loss = 7.5038e-03, PNorm = 180.4144, GNorm = 0.1951, lr_0 = 2.6028e-04
Loss = 1.1317e-02, PNorm = 180.4224, GNorm = 0.1968, lr_0 = 2.6011e-04
Loss = 6.5493e-03, PNorm = 180.4309, GNorm = 0.2047, lr_0 = 2.5993e-04
Loss = 5.1725e-03, PNorm = 180.4384, GNorm = 0.1116, lr_0 = 2.5975e-04
Loss = 7.2462e-03, PNorm = 180.4482, GNorm = 0.2011, lr_0 = 2.5957e-04
Loss = 1.1628e-02, PNorm = 180.4574, GNorm = 0.2083, lr_0 = 2.5939e-04
Loss = 1.2309e-02, PNorm = 180.4686, GNorm = 0.5782, lr_0 = 2.5922e-04
Loss = 1.7656e-02, PNorm = 180.4754, GNorm = 0.2216, lr_0 = 2.5904e-04
Loss = 8.7689e-03, PNorm = 180.4868, GNorm = 0.2968, lr_0 = 2.5886e-04
Loss = 9.6728e-03, PNorm = 180.4972, GNorm = 0.4572, lr_0 = 2.5868e-04
Loss = 1.1762e-02, PNorm = 180.5055, GNorm = 0.2794, lr_0 = 2.5851e-04
Loss = 7.7574e-03, PNorm = 180.5121, GNorm = 0.1215, lr_0 = 2.5833e-04
Loss = 9.9718e-03, PNorm = 180.5191, GNorm = 0.1796, lr_0 = 2.5815e-04
Loss = 6.8923e-03, PNorm = 180.5262, GNorm = 0.1690, lr_0 = 2.5797e-04
Loss = 6.5453e-03, PNorm = 180.5367, GNorm = 0.2022, lr_0 = 2.5780e-04
Loss = 6.4382e-03, PNorm = 180.5455, GNorm = 0.1937, lr_0 = 2.5762e-04
Loss = 6.3600e-03, PNorm = 180.5533, GNorm = 0.1510, lr_0 = 2.5745e-04
Loss = 1.0158e-02, PNorm = 180.5618, GNorm = 0.2026, lr_0 = 2.5727e-04
Loss = 6.4212e-03, PNorm = 180.5667, GNorm = 0.1218, lr_0 = 2.5709e-04
Loss = 5.8128e-03, PNorm = 180.5755, GNorm = 0.2664, lr_0 = 2.5692e-04
Loss = 7.1810e-03, PNorm = 180.5845, GNorm = 0.1880, lr_0 = 2.5674e-04
Loss = 6.3938e-03, PNorm = 180.5937, GNorm = 0.1583, lr_0 = 2.5656e-04
Loss = 1.0382e-02, PNorm = 180.6016, GNorm = 0.1454, lr_0 = 2.5639e-04
Loss = 5.0424e-03, PNorm = 180.6090, GNorm = 0.2319, lr_0 = 2.5621e-04
Loss = 8.2429e-03, PNorm = 180.6155, GNorm = 0.2842, lr_0 = 2.5604e-04
Loss = 9.3245e-03, PNorm = 180.6225, GNorm = 0.7000, lr_0 = 2.5586e-04
Loss = 7.1283e-03, PNorm = 180.6290, GNorm = 0.1407, lr_0 = 2.5569e-04
Loss = 4.5308e-03, PNorm = 180.6361, GNorm = 0.0982, lr_0 = 2.5551e-04
Loss = 6.5880e-03, PNorm = 180.6422, GNorm = 0.3266, lr_0 = 2.5534e-04
Loss = 6.4373e-03, PNorm = 180.6510, GNorm = 0.1591, lr_0 = 2.5516e-04
Loss = 7.7242e-03, PNorm = 180.6568, GNorm = 0.2203, lr_0 = 2.5499e-04
Loss = 5.7635e-03, PNorm = 180.6643, GNorm = 0.1063, lr_0 = 2.5481e-04
Loss = 1.3494e-02, PNorm = 180.6687, GNorm = 0.3105, lr_0 = 2.5464e-04
Loss = 8.1647e-03, PNorm = 180.6743, GNorm = 0.3634, lr_0 = 2.5446e-04
Loss = 5.9820e-03, PNorm = 180.6813, GNorm = 0.1574, lr_0 = 2.5429e-04
Loss = 4.9401e-03, PNorm = 180.6880, GNorm = 0.2439, lr_0 = 2.5411e-04
Loss = 4.9984e-03, PNorm = 180.6961, GNorm = 0.1246, lr_0 = 2.5394e-04
Loss = 5.4264e-03, PNorm = 180.7035, GNorm = 0.1589, lr_0 = 2.5377e-04
Loss = 5.6837e-03, PNorm = 180.7105, GNorm = 0.1524, lr_0 = 2.5359e-04
Loss = 7.7053e-03, PNorm = 180.7219, GNorm = 0.2460, lr_0 = 2.5342e-04
Loss = 5.9301e-03, PNorm = 180.7295, GNorm = 0.3111, lr_0 = 2.5325e-04
Loss = 9.0427e-03, PNorm = 180.7371, GNorm = 0.3661, lr_0 = 2.5307e-04
Loss = 1.0834e-02, PNorm = 180.7453, GNorm = 0.2307, lr_0 = 2.5290e-04
Loss = 6.5203e-03, PNorm = 180.7525, GNorm = 0.1525, lr_0 = 2.5273e-04
Loss = 5.8778e-03, PNorm = 180.7606, GNorm = 0.2000, lr_0 = 2.5255e-04
Loss = 8.8140e-03, PNorm = 180.7682, GNorm = 0.1888, lr_0 = 2.5238e-04
Loss = 6.6796e-03, PNorm = 180.7752, GNorm = 0.2311, lr_0 = 2.5221e-04
Loss = 1.6787e-02, PNorm = 180.7811, GNorm = 0.4968, lr_0 = 2.5203e-04
Loss = 6.6348e-03, PNorm = 180.7899, GNorm = 0.1732, lr_0 = 2.5186e-04
Loss = 7.2040e-03, PNorm = 180.7993, GNorm = 0.1516, lr_0 = 2.5169e-04
Loss = 6.8069e-03, PNorm = 180.8067, GNorm = 0.2377, lr_0 = 2.5152e-04
Loss = 5.4042e-03, PNorm = 180.8151, GNorm = 0.1265, lr_0 = 2.5134e-04
Loss = 6.3485e-03, PNorm = 180.8217, GNorm = 0.2304, lr_0 = 2.5117e-04
Loss = 1.2503e-02, PNorm = 180.8271, GNorm = 0.2035, lr_0 = 2.5100e-04
Loss = 6.4000e-03, PNorm = 180.8348, GNorm = 0.2947, lr_0 = 2.5083e-04
Loss = 6.3260e-03, PNorm = 180.8445, GNorm = 0.2950, lr_0 = 2.5066e-04
Loss = 1.2174e-02, PNorm = 180.8525, GNorm = 0.1466, lr_0 = 2.5048e-04
Loss = 7.4085e-03, PNorm = 180.8615, GNorm = 0.1527, lr_0 = 2.5031e-04
Loss = 9.7747e-03, PNorm = 180.8719, GNorm = 0.1260, lr_0 = 2.5014e-04
Loss = 8.6452e-03, PNorm = 180.8810, GNorm = 0.1754, lr_0 = 2.4997e-04
Loss = 9.0642e-03, PNorm = 180.8921, GNorm = 0.1477, lr_0 = 2.4980e-04
Loss = 1.0541e-02, PNorm = 180.9012, GNorm = 0.1636, lr_0 = 2.4963e-04
Loss = 1.5077e-02, PNorm = 180.9104, GNorm = 0.5243, lr_0 = 2.4946e-04
Loss = 9.1094e-03, PNorm = 180.9215, GNorm = 0.0956, lr_0 = 2.4929e-04
Loss = 1.2501e-02, PNorm = 180.9311, GNorm = 0.1285, lr_0 = 2.4911e-04
Loss = 1.2363e-02, PNorm = 180.9383, GNorm = 0.7983, lr_0 = 2.4894e-04
Loss = 7.0549e-03, PNorm = 180.9471, GNorm = 0.2858, lr_0 = 2.4877e-04
Loss = 1.4621e-02, PNorm = 180.9607, GNorm = 0.2732, lr_0 = 2.4860e-04
Loss = 1.4285e-02, PNorm = 180.9699, GNorm = 0.3764, lr_0 = 2.4843e-04
Loss = 8.5747e-03, PNorm = 180.9798, GNorm = 0.1479, lr_0 = 2.4826e-04
Loss = 9.3938e-03, PNorm = 180.9879, GNorm = 0.2147, lr_0 = 2.4809e-04
Loss = 7.4640e-03, PNorm = 180.9941, GNorm = 0.1874, lr_0 = 2.4792e-04
Loss = 6.5870e-03, PNorm = 181.0029, GNorm = 0.3273, lr_0 = 2.4775e-04
Loss = 8.2968e-03, PNorm = 181.0095, GNorm = 0.2853, lr_0 = 2.4758e-04
Loss = 8.6509e-03, PNorm = 181.0152, GNorm = 0.1665, lr_0 = 2.4741e-04
Loss = 7.6279e-03, PNorm = 181.0210, GNorm = 0.4642, lr_0 = 2.4724e-04
Loss = 5.5077e-03, PNorm = 181.0275, GNorm = 0.0921, lr_0 = 2.4707e-04
Validation mae = 0.121048
Epoch 19
Loss = 6.3723e-03, PNorm = 181.0354, GNorm = 0.1506, lr_0 = 2.4690e-04
Loss = 6.5126e-03, PNorm = 181.0414, GNorm = 0.2365, lr_0 = 2.4674e-04
Loss = 6.7337e-03, PNorm = 181.0475, GNorm = 0.1533, lr_0 = 2.4657e-04
Loss = 6.5583e-03, PNorm = 181.0534, GNorm = 0.1383, lr_0 = 2.4640e-04
Loss = 5.5925e-03, PNorm = 181.0589, GNorm = 0.1078, lr_0 = 2.4623e-04
Loss = 7.9734e-03, PNorm = 181.0663, GNorm = 0.2324, lr_0 = 2.4606e-04
Loss = 8.2908e-03, PNorm = 181.0717, GNorm = 0.4523, lr_0 = 2.4589e-04
Loss = 1.1451e-02, PNorm = 181.0776, GNorm = 0.1406, lr_0 = 2.4572e-04
Loss = 7.5398e-03, PNorm = 181.0825, GNorm = 0.1272, lr_0 = 2.4556e-04
Loss = 6.3250e-03, PNorm = 181.0885, GNorm = 0.1556, lr_0 = 2.4539e-04
Loss = 6.0473e-03, PNorm = 181.0950, GNorm = 0.1712, lr_0 = 2.4522e-04
Loss = 8.6259e-03, PNorm = 181.0993, GNorm = 0.2290, lr_0 = 2.4505e-04
Loss = 1.1852e-02, PNorm = 181.1054, GNorm = 0.8394, lr_0 = 2.4488e-04
Loss = 6.8083e-03, PNorm = 181.1103, GNorm = 0.1074, lr_0 = 2.4472e-04
Loss = 8.3727e-03, PNorm = 181.1158, GNorm = 0.1591, lr_0 = 2.4455e-04
Loss = 5.8736e-03, PNorm = 181.1226, GNorm = 0.0893, lr_0 = 2.4438e-04
Loss = 7.2381e-03, PNorm = 181.1281, GNorm = 0.4085, lr_0 = 2.4421e-04
Loss = 6.7269e-03, PNorm = 181.1354, GNorm = 0.1223, lr_0 = 2.4405e-04
Loss = 6.8669e-03, PNorm = 181.1407, GNorm = 0.2226, lr_0 = 2.4388e-04
Loss = 6.5738e-03, PNorm = 181.1454, GNorm = 0.8513, lr_0 = 2.4371e-04
Loss = 9.0271e-03, PNorm = 181.1476, GNorm = 0.3118, lr_0 = 2.4354e-04
Loss = 7.4385e-03, PNorm = 181.1539, GNorm = 0.3107, lr_0 = 2.4338e-04
Loss = 9.5834e-03, PNorm = 181.1592, GNorm = 0.2757, lr_0 = 2.4321e-04
Loss = 6.3596e-03, PNorm = 181.1668, GNorm = 0.1358, lr_0 = 2.4304e-04
Loss = 6.9885e-03, PNorm = 181.1731, GNorm = 0.2903, lr_0 = 2.4288e-04
Loss = 9.0355e-03, PNorm = 181.1788, GNorm = 0.1884, lr_0 = 2.4271e-04
Loss = 5.1387e-03, PNorm = 181.1842, GNorm = 0.1094, lr_0 = 2.4254e-04
Loss = 1.9221e-02, PNorm = 181.1922, GNorm = 0.2264, lr_0 = 2.4238e-04
Loss = 7.1913e-03, PNorm = 181.1981, GNorm = 0.1643, lr_0 = 2.4221e-04
Loss = 6.3953e-03, PNorm = 181.2062, GNorm = 0.3158, lr_0 = 2.4205e-04
Loss = 4.4102e-03, PNorm = 181.2128, GNorm = 0.1350, lr_0 = 2.4188e-04
Loss = 6.5127e-03, PNorm = 181.2159, GNorm = 0.2424, lr_0 = 2.4171e-04
Loss = 6.1374e-03, PNorm = 181.2209, GNorm = 0.1356, lr_0 = 2.4155e-04
Loss = 5.3505e-03, PNorm = 181.2271, GNorm = 0.1772, lr_0 = 2.4138e-04
Loss = 6.2059e-03, PNorm = 181.2331, GNorm = 0.1518, lr_0 = 2.4122e-04
Loss = 6.0064e-03, PNorm = 181.2392, GNorm = 0.1452, lr_0 = 2.4105e-04
Loss = 6.5651e-03, PNorm = 181.2456, GNorm = 0.1303, lr_0 = 2.4089e-04
Loss = 7.4023e-03, PNorm = 181.2499, GNorm = 0.2627, lr_0 = 2.4072e-04
Loss = 4.0966e-03, PNorm = 181.2544, GNorm = 0.1275, lr_0 = 2.4056e-04
Loss = 6.5249e-03, PNorm = 181.2620, GNorm = 0.3977, lr_0 = 2.4039e-04
Loss = 7.3761e-03, PNorm = 181.2668, GNorm = 0.1682, lr_0 = 2.4023e-04
Loss = 5.6347e-03, PNorm = 181.2718, GNorm = 0.2027, lr_0 = 2.4006e-04
Loss = 6.2099e-03, PNorm = 181.2792, GNorm = 0.0920, lr_0 = 2.3990e-04
Loss = 4.4412e-03, PNorm = 181.2835, GNorm = 0.1575, lr_0 = 2.3974e-04
Loss = 1.0548e-02, PNorm = 181.2881, GNorm = 0.7338, lr_0 = 2.3957e-04
Loss = 5.9621e-03, PNorm = 181.2950, GNorm = 0.2024, lr_0 = 2.3941e-04
Loss = 6.0155e-03, PNorm = 181.3012, GNorm = 0.2640, lr_0 = 2.3924e-04
Loss = 7.4871e-03, PNorm = 181.3095, GNorm = 0.1070, lr_0 = 2.3908e-04
Loss = 6.4021e-03, PNorm = 181.3158, GNorm = 0.1946, lr_0 = 2.3892e-04
Loss = 4.3576e-03, PNorm = 181.3213, GNorm = 0.1588, lr_0 = 2.3875e-04
Loss = 8.4171e-03, PNorm = 181.3276, GNorm = 0.1117, lr_0 = 2.3859e-04
Loss = 4.3368e-03, PNorm = 181.3326, GNorm = 0.2232, lr_0 = 2.3842e-04
Loss = 4.9051e-03, PNorm = 181.3380, GNorm = 0.4845, lr_0 = 2.3826e-04
Loss = 5.1385e-03, PNorm = 181.3446, GNorm = 0.1104, lr_0 = 2.3810e-04
Loss = 4.2496e-03, PNorm = 181.3497, GNorm = 0.4275, lr_0 = 2.3794e-04
Loss = 4.4767e-03, PNorm = 181.3548, GNorm = 0.1087, lr_0 = 2.3777e-04
Loss = 6.6772e-03, PNorm = 181.3601, GNorm = 0.3197, lr_0 = 2.3761e-04
Loss = 7.6078e-03, PNorm = 181.3653, GNorm = 0.3906, lr_0 = 2.3745e-04
Loss = 1.3104e-02, PNorm = 181.3687, GNorm = 0.1818, lr_0 = 2.3728e-04
Loss = 6.9973e-03, PNorm = 181.3739, GNorm = 0.1092, lr_0 = 2.3712e-04
Loss = 8.9508e-03, PNorm = 181.3801, GNorm = 0.1318, lr_0 = 2.3696e-04
Loss = 1.3204e-02, PNorm = 181.3874, GNorm = 0.2446, lr_0 = 2.3680e-04
Loss = 9.1659e-03, PNorm = 181.3939, GNorm = 0.1352, lr_0 = 2.3663e-04
Loss = 5.7019e-03, PNorm = 181.3990, GNorm = 0.1014, lr_0 = 2.3647e-04
Loss = 4.4913e-03, PNorm = 181.4046, GNorm = 0.1406, lr_0 = 2.3631e-04
Loss = 8.5447e-03, PNorm = 181.4119, GNorm = 0.2483, lr_0 = 2.3615e-04
Loss = 1.3250e-02, PNorm = 181.4217, GNorm = 0.1776, lr_0 = 2.3599e-04
Loss = 5.7670e-03, PNorm = 181.4290, GNorm = 0.1370, lr_0 = 2.3582e-04
Loss = 1.0003e-02, PNorm = 181.4352, GNorm = 0.1611, lr_0 = 2.3566e-04
Loss = 7.9934e-03, PNorm = 181.4400, GNorm = 0.1925, lr_0 = 2.3550e-04
Loss = 5.2329e-03, PNorm = 181.4470, GNorm = 0.1122, lr_0 = 2.3534e-04
Loss = 4.5061e-03, PNorm = 181.4535, GNorm = 0.0996, lr_0 = 2.3518e-04
Loss = 5.2965e-03, PNorm = 181.4588, GNorm = 0.3344, lr_0 = 2.3502e-04
Loss = 6.5257e-03, PNorm = 181.4653, GNorm = 0.1737, lr_0 = 2.3486e-04
Loss = 5.3657e-03, PNorm = 181.4726, GNorm = 0.1862, lr_0 = 2.3470e-04
Loss = 6.5832e-03, PNorm = 181.4812, GNorm = 0.3608, lr_0 = 2.3454e-04
Loss = 7.8507e-03, PNorm = 181.4909, GNorm = 0.4431, lr_0 = 2.3437e-04
Loss = 9.2955e-03, PNorm = 181.4974, GNorm = 0.3238, lr_0 = 2.3421e-04
Loss = 1.0250e-02, PNorm = 181.5064, GNorm = 0.1850, lr_0 = 2.3405e-04
Loss = 6.3911e-03, PNorm = 181.5157, GNorm = 0.2881, lr_0 = 2.3389e-04
Loss = 6.4114e-03, PNorm = 181.5253, GNorm = 0.2930, lr_0 = 2.3373e-04
Loss = 4.8956e-03, PNorm = 181.5338, GNorm = 0.0914, lr_0 = 2.3357e-04
Loss = 9.3916e-03, PNorm = 181.5428, GNorm = 0.1388, lr_0 = 2.3341e-04
Loss = 5.7675e-03, PNorm = 181.5468, GNorm = 0.2294, lr_0 = 2.3325e-04
Loss = 5.8175e-03, PNorm = 181.5507, GNorm = 0.2277, lr_0 = 2.3309e-04
Loss = 5.9098e-03, PNorm = 181.5547, GNorm = 0.2904, lr_0 = 2.3293e-04
Loss = 5.9282e-03, PNorm = 181.5576, GNorm = 0.0811, lr_0 = 2.3277e-04
Loss = 5.4229e-03, PNorm = 181.5630, GNorm = 0.1439, lr_0 = 2.3261e-04
Loss = 5.2123e-03, PNorm = 181.5702, GNorm = 0.1962, lr_0 = 2.3246e-04
Loss = 4.8518e-03, PNorm = 181.5776, GNorm = 0.1306, lr_0 = 2.3230e-04
Loss = 4.7610e-03, PNorm = 181.5843, GNorm = 0.2352, lr_0 = 2.3214e-04
Loss = 7.6494e-03, PNorm = 181.5921, GNorm = 0.0793, lr_0 = 2.3198e-04
Loss = 7.0158e-03, PNorm = 181.5997, GNorm = 0.1306, lr_0 = 2.3182e-04
Loss = 7.3096e-03, PNorm = 181.6061, GNorm = 0.1109, lr_0 = 2.3166e-04
Loss = 4.4323e-03, PNorm = 181.6120, GNorm = 0.1595, lr_0 = 2.3150e-04
Loss = 6.2994e-03, PNorm = 181.6170, GNorm = 0.1942, lr_0 = 2.3134e-04
Loss = 6.1467e-03, PNorm = 181.6262, GNorm = 0.2316, lr_0 = 2.3118e-04
Loss = 8.6786e-03, PNorm = 181.6352, GNorm = 0.1996, lr_0 = 2.3103e-04
Loss = 9.2398e-03, PNorm = 181.6416, GNorm = 0.3775, lr_0 = 2.3087e-04
Loss = 5.4423e-03, PNorm = 181.6460, GNorm = 0.2102, lr_0 = 2.3071e-04
Loss = 8.0619e-03, PNorm = 181.6530, GNorm = 0.2400, lr_0 = 2.3055e-04
Loss = 8.7281e-03, PNorm = 181.6599, GNorm = 0.1371, lr_0 = 2.3039e-04
Loss = 7.6856e-03, PNorm = 181.6658, GNorm = 0.2361, lr_0 = 2.3024e-04
Loss = 1.0886e-02, PNorm = 181.6713, GNorm = 0.1296, lr_0 = 2.3008e-04
Loss = 1.2939e-02, PNorm = 181.6753, GNorm = 0.3446, lr_0 = 2.2992e-04
Loss = 6.1246e-03, PNorm = 181.6809, GNorm = 0.2561, lr_0 = 2.2976e-04
Loss = 1.0064e-02, PNorm = 181.6905, GNorm = 0.1542, lr_0 = 2.2961e-04
Loss = 6.2611e-03, PNorm = 181.6993, GNorm = 0.1951, lr_0 = 2.2945e-04
Loss = 1.0779e-02, PNorm = 181.7105, GNorm = 0.1039, lr_0 = 2.2929e-04
Loss = 6.6676e-03, PNorm = 181.7182, GNorm = 0.3287, lr_0 = 2.2913e-04
Loss = 7.0644e-03, PNorm = 181.7257, GNorm = 0.1592, lr_0 = 2.2898e-04
Loss = 6.8899e-03, PNorm = 181.7319, GNorm = 0.1756, lr_0 = 2.2882e-04
Loss = 6.0667e-03, PNorm = 181.7390, GNorm = 0.3811, lr_0 = 2.2866e-04
Loss = 5.9311e-03, PNorm = 181.7471, GNorm = 0.0668, lr_0 = 2.2851e-04
Loss = 6.5411e-03, PNorm = 181.7538, GNorm = 0.1205, lr_0 = 2.2835e-04
Loss = 5.6695e-03, PNorm = 181.7612, GNorm = 0.1459, lr_0 = 2.2819e-04
Loss = 4.1902e-03, PNorm = 181.7675, GNorm = 0.2761, lr_0 = 2.2804e-04
Loss = 1.0661e-02, PNorm = 181.7737, GNorm = 0.4740, lr_0 = 2.2788e-04
Loss = 8.9978e-03, PNorm = 181.7808, GNorm = 0.1280, lr_0 = 2.2773e-04
Loss = 1.1857e-02, PNorm = 181.7875, GNorm = 0.1743, lr_0 = 2.2757e-04
Validation mae = 0.121188
Epoch 20
Loss = 5.6909e-03, PNorm = 181.7917, GNorm = 0.1625, lr_0 = 2.2741e-04
Loss = 8.0866e-03, PNorm = 181.7969, GNorm = 0.1542, lr_0 = 2.2726e-04
Loss = 4.1612e-03, PNorm = 181.7999, GNorm = 0.1865, lr_0 = 2.2710e-04
Loss = 5.1383e-03, PNorm = 181.8052, GNorm = 0.1248, lr_0 = 2.2695e-04
Loss = 1.0720e-02, PNorm = 181.8133, GNorm = 0.1894, lr_0 = 2.2679e-04
Loss = 4.8602e-03, PNorm = 181.8191, GNorm = 0.1334, lr_0 = 2.2664e-04
Loss = 9.8426e-03, PNorm = 181.8241, GNorm = 0.3030, lr_0 = 2.2648e-04
Loss = 8.7892e-03, PNorm = 181.8293, GNorm = 0.0773, lr_0 = 2.2632e-04
Loss = 5.2268e-03, PNorm = 181.8351, GNorm = 0.4589, lr_0 = 2.2617e-04
Loss = 6.3300e-03, PNorm = 181.8402, GNorm = 0.0945, lr_0 = 2.2601e-04
Loss = 4.6598e-03, PNorm = 181.8437, GNorm = 0.1083, lr_0 = 2.2586e-04
Loss = 7.4604e-03, PNorm = 181.8488, GNorm = 0.1449, lr_0 = 2.2571e-04
Loss = 1.7835e-02, PNorm = 181.8550, GNorm = 0.1893, lr_0 = 2.2555e-04
Loss = 5.7549e-03, PNorm = 181.8589, GNorm = 0.1708, lr_0 = 2.2540e-04
Loss = 3.8266e-03, PNorm = 181.8629, GNorm = 0.0851, lr_0 = 2.2524e-04
Loss = 5.2923e-03, PNorm = 181.8651, GNorm = 0.2249, lr_0 = 2.2509e-04
Loss = 4.6116e-03, PNorm = 181.8686, GNorm = 0.0956, lr_0 = 2.2493e-04
Loss = 5.9414e-03, PNorm = 181.8728, GNorm = 0.3225, lr_0 = 2.2478e-04
Loss = 7.6788e-03, PNorm = 181.8772, GNorm = 0.1323, lr_0 = 2.2463e-04
Loss = 3.8299e-03, PNorm = 181.8810, GNorm = 0.2123, lr_0 = 2.2447e-04
Loss = 6.5328e-03, PNorm = 181.8852, GNorm = 0.3035, lr_0 = 2.2432e-04
Loss = 4.2935e-03, PNorm = 181.8916, GNorm = 0.2604, lr_0 = 2.2416e-04
Loss = 5.0804e-03, PNorm = 181.8958, GNorm = 0.3327, lr_0 = 2.2401e-04
Loss = 5.1598e-03, PNorm = 181.9005, GNorm = 0.1394, lr_0 = 2.2386e-04
Loss = 5.4378e-03, PNorm = 181.9047, GNorm = 0.1845, lr_0 = 2.2370e-04
Loss = 5.2029e-03, PNorm = 181.9106, GNorm = 0.0977, lr_0 = 2.2355e-04
Loss = 3.9926e-03, PNorm = 181.9165, GNorm = 0.0877, lr_0 = 2.2340e-04
Loss = 7.9157e-03, PNorm = 181.9203, GNorm = 0.1779, lr_0 = 2.2324e-04
Loss = 5.6511e-03, PNorm = 181.9243, GNorm = 0.1478, lr_0 = 2.2309e-04
Loss = 4.2192e-03, PNorm = 181.9294, GNorm = 0.3298, lr_0 = 2.2294e-04
Loss = 4.1993e-03, PNorm = 181.9333, GNorm = 0.2843, lr_0 = 2.2279e-04
Loss = 7.3174e-03, PNorm = 181.9382, GNorm = 0.3757, lr_0 = 2.2263e-04
Loss = 7.1888e-03, PNorm = 181.9441, GNorm = 0.2216, lr_0 = 2.2248e-04
Loss = 8.7329e-03, PNorm = 181.9488, GNorm = 0.2068, lr_0 = 2.2233e-04
Loss = 8.0784e-03, PNorm = 181.9552, GNorm = 0.1516, lr_0 = 2.2218e-04
Loss = 3.7957e-03, PNorm = 181.9597, GNorm = 0.1233, lr_0 = 2.2202e-04
Loss = 5.7550e-03, PNorm = 181.9640, GNorm = 0.1202, lr_0 = 2.2187e-04
Loss = 7.8576e-03, PNorm = 181.9705, GNorm = 0.2514, lr_0 = 2.2172e-04
Loss = 5.2949e-03, PNorm = 181.9763, GNorm = 0.0687, lr_0 = 2.2157e-04
Loss = 9.6347e-03, PNorm = 181.9814, GNorm = 0.1144, lr_0 = 2.2142e-04
Loss = 4.3805e-03, PNorm = 181.9864, GNorm = 0.1168, lr_0 = 2.2126e-04
Loss = 6.3504e-03, PNorm = 181.9931, GNorm = 0.2482, lr_0 = 2.2111e-04
Loss = 7.9118e-03, PNorm = 181.9964, GNorm = 0.4079, lr_0 = 2.2096e-04
Loss = 6.1921e-03, PNorm = 182.0011, GNorm = 0.1013, lr_0 = 2.2081e-04
Loss = 8.3812e-03, PNorm = 182.0037, GNorm = 0.2310, lr_0 = 2.2066e-04
Loss = 4.3126e-03, PNorm = 182.0090, GNorm = 0.2275, lr_0 = 2.2051e-04
Loss = 4.2028e-03, PNorm = 182.0144, GNorm = 0.0902, lr_0 = 2.2036e-04
Loss = 3.9405e-03, PNorm = 182.0207, GNorm = 0.1573, lr_0 = 2.2021e-04
Loss = 3.5586e-03, PNorm = 182.0267, GNorm = 0.1604, lr_0 = 2.2005e-04
Loss = 1.0051e-02, PNorm = 182.0315, GNorm = 0.8767, lr_0 = 2.1990e-04
Loss = 4.6670e-03, PNorm = 182.0370, GNorm = 0.2564, lr_0 = 2.1975e-04
Loss = 1.1938e-02, PNorm = 182.0425, GNorm = 1.5877, lr_0 = 2.1960e-04
Loss = 6.0029e-03, PNorm = 182.0449, GNorm = 0.2419, lr_0 = 2.1945e-04
Loss = 4.7356e-03, PNorm = 182.0497, GNorm = 0.1323, lr_0 = 2.1930e-04
Loss = 4.8029e-03, PNorm = 182.0559, GNorm = 0.1282, lr_0 = 2.1915e-04
Loss = 5.0403e-03, PNorm = 182.0605, GNorm = 0.0834, lr_0 = 2.1900e-04
Loss = 1.0212e-02, PNorm = 182.0659, GNorm = 0.5821, lr_0 = 2.1885e-04
Loss = 6.5690e-03, PNorm = 182.0724, GNorm = 0.3317, lr_0 = 2.1870e-04
Loss = 5.8687e-03, PNorm = 182.0782, GNorm = 0.2686, lr_0 = 2.1855e-04
Loss = 3.6510e-03, PNorm = 182.0839, GNorm = 0.2189, lr_0 = 2.1840e-04
Loss = 4.1370e-03, PNorm = 182.0883, GNorm = 0.1787, lr_0 = 2.1825e-04
Loss = 1.0213e-02, PNorm = 182.0917, GNorm = 0.1255, lr_0 = 2.1810e-04
Loss = 4.3977e-03, PNorm = 182.0991, GNorm = 0.2405, lr_0 = 2.1795e-04
Loss = 5.4082e-03, PNorm = 182.1047, GNorm = 0.1673, lr_0 = 2.1780e-04
Loss = 4.4910e-03, PNorm = 182.1094, GNorm = 0.2751, lr_0 = 2.1765e-04
Loss = 4.7218e-03, PNorm = 182.1156, GNorm = 0.1933, lr_0 = 2.1751e-04
Loss = 7.0182e-03, PNorm = 182.1186, GNorm = 0.1086, lr_0 = 2.1736e-04
Loss = 3.6969e-03, PNorm = 182.1242, GNorm = 0.1441, lr_0 = 2.1721e-04
Loss = 7.2703e-03, PNorm = 182.1296, GNorm = 0.1490, lr_0 = 2.1706e-04
Loss = 1.0179e-02, PNorm = 182.1335, GNorm = 0.1772, lr_0 = 2.1691e-04
Loss = 4.1158e-03, PNorm = 182.1394, GNorm = 0.2815, lr_0 = 2.1676e-04
Loss = 8.9391e-03, PNorm = 182.1424, GNorm = 0.2902, lr_0 = 2.1661e-04
Loss = 4.9235e-03, PNorm = 182.1477, GNorm = 0.1221, lr_0 = 2.1646e-04
Loss = 5.9842e-03, PNorm = 182.1525, GNorm = 0.1478, lr_0 = 2.1632e-04
Loss = 5.5202e-03, PNorm = 182.1577, GNorm = 0.1380, lr_0 = 2.1617e-04
Loss = 7.3181e-03, PNorm = 182.1653, GNorm = 0.1350, lr_0 = 2.1602e-04
Loss = 4.7769e-03, PNorm = 182.1707, GNorm = 0.1465, lr_0 = 2.1587e-04
Loss = 4.4302e-03, PNorm = 182.1757, GNorm = 0.0916, lr_0 = 2.1572e-04
Loss = 9.3900e-03, PNorm = 182.1858, GNorm = 0.3473, lr_0 = 2.1558e-04
Loss = 6.1368e-03, PNorm = 182.1923, GNorm = 0.0804, lr_0 = 2.1543e-04
Loss = 7.7633e-03, PNorm = 182.1974, GNorm = 0.2320, lr_0 = 2.1528e-04
Loss = 5.1079e-03, PNorm = 182.2031, GNorm = 0.1624, lr_0 = 2.1513e-04
Loss = 3.8336e-03, PNorm = 182.2077, GNorm = 0.0834, lr_0 = 2.1499e-04
Loss = 5.7249e-03, PNorm = 182.2117, GNorm = 0.1475, lr_0 = 2.1484e-04
Loss = 5.7753e-03, PNorm = 182.2160, GNorm = 0.2932, lr_0 = 2.1469e-04
Loss = 4.0535e-03, PNorm = 182.2216, GNorm = 0.2191, lr_0 = 2.1454e-04
Loss = 6.0692e-03, PNorm = 182.2292, GNorm = 0.1212, lr_0 = 2.1440e-04
Loss = 4.1026e-03, PNorm = 182.2355, GNorm = 0.1444, lr_0 = 2.1425e-04
Loss = 4.0383e-03, PNorm = 182.2418, GNorm = 0.0770, lr_0 = 2.1410e-04
Loss = 3.3999e-03, PNorm = 182.2470, GNorm = 0.1795, lr_0 = 2.1396e-04
Loss = 1.0672e-02, PNorm = 182.2518, GNorm = 0.2282, lr_0 = 2.1381e-04
Loss = 1.2756e-02, PNorm = 182.2588, GNorm = 0.2641, lr_0 = 2.1366e-04
Loss = 8.5677e-03, PNorm = 182.2649, GNorm = 0.1718, lr_0 = 2.1352e-04
Loss = 6.2577e-03, PNorm = 182.2714, GNorm = 0.3700, lr_0 = 2.1337e-04
Loss = 7.2373e-03, PNorm = 182.2791, GNorm = 0.1203, lr_0 = 2.1323e-04
Loss = 4.2422e-03, PNorm = 182.2839, GNorm = 0.0753, lr_0 = 2.1308e-04
Loss = 4.8828e-03, PNorm = 182.2898, GNorm = 0.1610, lr_0 = 2.1293e-04
Loss = 7.3173e-03, PNorm = 182.2962, GNorm = 0.1797, lr_0 = 2.1279e-04
Loss = 6.5320e-03, PNorm = 182.3031, GNorm = 0.3699, lr_0 = 2.1264e-04
Loss = 9.0057e-03, PNorm = 182.3079, GNorm = 0.1216, lr_0 = 2.1250e-04
Loss = 1.0121e-02, PNorm = 182.3141, GNorm = 0.2289, lr_0 = 2.1235e-04
Loss = 5.4036e-03, PNorm = 182.3204, GNorm = 0.3218, lr_0 = 2.1221e-04
Loss = 1.2846e-02, PNorm = 182.3256, GNorm = 0.2011, lr_0 = 2.1206e-04
Loss = 4.1516e-03, PNorm = 182.3321, GNorm = 0.0790, lr_0 = 2.1191e-04
Loss = 3.9095e-03, PNorm = 182.3376, GNorm = 0.1094, lr_0 = 2.1177e-04
Loss = 5.7842e-03, PNorm = 182.3418, GNorm = 0.7874, lr_0 = 2.1162e-04
Loss = 5.9663e-03, PNorm = 182.3457, GNorm = 0.1876, lr_0 = 2.1148e-04
Loss = 4.7506e-03, PNorm = 182.3528, GNorm = 0.1234, lr_0 = 2.1133e-04
Loss = 4.5495e-03, PNorm = 182.3578, GNorm = 0.2436, lr_0 = 2.1119e-04
Loss = 9.2129e-03, PNorm = 182.3616, GNorm = 0.1228, lr_0 = 2.1104e-04
Loss = 7.6505e-03, PNorm = 182.3668, GNorm = 0.0888, lr_0 = 2.1090e-04
Loss = 6.6679e-03, PNorm = 182.3714, GNorm = 0.2488, lr_0 = 2.1076e-04
Loss = 6.4247e-03, PNorm = 182.3779, GNorm = 0.4219, lr_0 = 2.1061e-04
Loss = 8.9684e-03, PNorm = 182.3857, GNorm = 0.1380, lr_0 = 2.1047e-04
Loss = 4.8320e-03, PNorm = 182.3930, GNorm = 0.1461, lr_0 = 2.1032e-04
Loss = 7.8975e-03, PNorm = 182.3976, GNorm = 0.3600, lr_0 = 2.1018e-04
Loss = 6.0463e-03, PNorm = 182.4024, GNorm = 0.1586, lr_0 = 2.1003e-04
Loss = 8.3979e-03, PNorm = 182.4093, GNorm = 0.1193, lr_0 = 2.0989e-04
Loss = 6.6358e-03, PNorm = 182.4167, GNorm = 0.1496, lr_0 = 2.0975e-04
Loss = 6.9490e-03, PNorm = 182.4236, GNorm = 0.1619, lr_0 = 2.0960e-04
Validation mae = 0.120829
Epoch 21
Loss = 1.0104e-02, PNorm = 182.4272, GNorm = 0.0894, lr_0 = 2.0946e-04
Loss = 4.0374e-03, PNorm = 182.4301, GNorm = 0.2320, lr_0 = 2.0932e-04
Loss = 7.3280e-03, PNorm = 182.4336, GNorm = 0.1402, lr_0 = 2.0917e-04
Loss = 7.1892e-03, PNorm = 182.4374, GNorm = 0.1696, lr_0 = 2.0903e-04
Loss = 4.6344e-03, PNorm = 182.4417, GNorm = 0.1406, lr_0 = 2.0889e-04
Loss = 8.3198e-03, PNorm = 182.4452, GNorm = 0.2525, lr_0 = 2.0874e-04
Loss = 4.7642e-03, PNorm = 182.4495, GNorm = 0.1915, lr_0 = 2.0860e-04
Loss = 4.6070e-03, PNorm = 182.4524, GNorm = 0.1363, lr_0 = 2.0846e-04
Loss = 5.6040e-03, PNorm = 182.4563, GNorm = 0.1149, lr_0 = 2.0831e-04
Loss = 5.2545e-03, PNorm = 182.4609, GNorm = 0.3680, lr_0 = 2.0817e-04
Loss = 6.7453e-03, PNorm = 182.4637, GNorm = 0.1957, lr_0 = 2.0803e-04
Loss = 3.7665e-03, PNorm = 182.4658, GNorm = 0.1656, lr_0 = 2.0789e-04
Loss = 5.4488e-03, PNorm = 182.4715, GNorm = 0.2258, lr_0 = 2.0774e-04
Loss = 6.1210e-03, PNorm = 182.4772, GNorm = 0.1664, lr_0 = 2.0760e-04
Loss = 9.8209e-03, PNorm = 182.4838, GNorm = 0.2245, lr_0 = 2.0746e-04
Loss = 5.1995e-03, PNorm = 182.4917, GNorm = 0.1406, lr_0 = 2.0732e-04
Loss = 4.3983e-03, PNorm = 182.4971, GNorm = 0.2263, lr_0 = 2.0718e-04
Loss = 4.4045e-03, PNorm = 182.4996, GNorm = 0.0894, lr_0 = 2.0703e-04
Loss = 4.4897e-03, PNorm = 182.5020, GNorm = 0.1612, lr_0 = 2.0689e-04
Loss = 4.6314e-03, PNorm = 182.5072, GNorm = 0.1592, lr_0 = 2.0675e-04
Loss = 3.8390e-03, PNorm = 182.5092, GNorm = 0.1296, lr_0 = 2.0661e-04
Loss = 4.5840e-03, PNorm = 182.5120, GNorm = 0.1516, lr_0 = 2.0647e-04
Loss = 6.8126e-03, PNorm = 182.5175, GNorm = 0.1590, lr_0 = 2.0633e-04
Loss = 5.9056e-03, PNorm = 182.5226, GNorm = 0.1547, lr_0 = 2.0618e-04
Loss = 4.6308e-03, PNorm = 182.5263, GNorm = 0.1664, lr_0 = 2.0604e-04
Loss = 4.5257e-03, PNorm = 182.5309, GNorm = 0.1418, lr_0 = 2.0590e-04
Loss = 9.7195e-03, PNorm = 182.5357, GNorm = 1.5141, lr_0 = 2.0576e-04
Loss = 5.6844e-03, PNorm = 182.5394, GNorm = 0.1564, lr_0 = 2.0562e-04
Loss = 4.2823e-03, PNorm = 182.5441, GNorm = 0.3766, lr_0 = 2.0548e-04
Loss = 4.7726e-03, PNorm = 182.5484, GNorm = 0.1640, lr_0 = 2.0534e-04
Loss = 7.7070e-03, PNorm = 182.5503, GNorm = 0.1034, lr_0 = 2.0520e-04
Loss = 6.5934e-03, PNorm = 182.5551, GNorm = 0.2622, lr_0 = 2.0506e-04
Loss = 7.6172e-03, PNorm = 182.5615, GNorm = 0.1571, lr_0 = 2.0492e-04
Loss = 9.1125e-03, PNorm = 182.5629, GNorm = 0.3115, lr_0 = 2.0478e-04
Loss = 5.5577e-03, PNorm = 182.5689, GNorm = 0.4483, lr_0 = 2.0464e-04
Loss = 4.0202e-03, PNorm = 182.5730, GNorm = 0.1290, lr_0 = 2.0450e-04
Loss = 5.0716e-03, PNorm = 182.5771, GNorm = 0.1879, lr_0 = 2.0436e-04
Loss = 3.7432e-03, PNorm = 182.5841, GNorm = 0.1964, lr_0 = 2.0422e-04
Loss = 5.9875e-03, PNorm = 182.5877, GNorm = 0.1288, lr_0 = 2.0408e-04
Loss = 4.1811e-03, PNorm = 182.5941, GNorm = 0.2027, lr_0 = 2.0394e-04
Loss = 7.9101e-03, PNorm = 182.5996, GNorm = 1.2211, lr_0 = 2.0380e-04
Loss = 5.8969e-03, PNorm = 182.6042, GNorm = 0.2023, lr_0 = 2.0366e-04
Loss = 1.0572e-02, PNorm = 182.6111, GNorm = 0.2001, lr_0 = 2.0352e-04
Loss = 5.2993e-03, PNorm = 182.6151, GNorm = 0.1856, lr_0 = 2.0338e-04
Loss = 5.1744e-03, PNorm = 182.6203, GNorm = 0.2574, lr_0 = 2.0324e-04
Loss = 5.0994e-03, PNorm = 182.6252, GNorm = 0.3528, lr_0 = 2.0310e-04
Loss = 3.4852e-03, PNorm = 182.6301, GNorm = 0.1223, lr_0 = 2.0296e-04
Loss = 4.0376e-03, PNorm = 182.6325, GNorm = 0.1015, lr_0 = 2.0282e-04
Loss = 6.0645e-03, PNorm = 182.6350, GNorm = 0.0866, lr_0 = 2.0268e-04
Loss = 5.8253e-03, PNorm = 182.6381, GNorm = 0.1579, lr_0 = 2.0254e-04
Loss = 5.5594e-03, PNorm = 182.6405, GNorm = 0.3514, lr_0 = 2.0240e-04
Loss = 6.4221e-03, PNorm = 182.6451, GNorm = 0.0995, lr_0 = 2.0227e-04
Loss = 4.3149e-03, PNorm = 182.6508, GNorm = 0.3533, lr_0 = 2.0213e-04
Loss = 3.6953e-03, PNorm = 182.6562, GNorm = 0.2260, lr_0 = 2.0199e-04
Loss = 5.3302e-03, PNorm = 182.6602, GNorm = 0.1712, lr_0 = 2.0185e-04
Loss = 3.6233e-03, PNorm = 182.6647, GNorm = 0.2663, lr_0 = 2.0171e-04
Loss = 3.4740e-03, PNorm = 182.6672, GNorm = 0.1582, lr_0 = 2.0157e-04
Loss = 4.7716e-03, PNorm = 182.6689, GNorm = 0.1767, lr_0 = 2.0144e-04
Loss = 6.0019e-03, PNorm = 182.6730, GNorm = 0.0988, lr_0 = 2.0130e-04
Loss = 3.1785e-03, PNorm = 182.6775, GNorm = 0.1928, lr_0 = 2.0116e-04
Loss = 5.7762e-03, PNorm = 182.6815, GNorm = 0.2312, lr_0 = 2.0102e-04
Loss = 3.4542e-03, PNorm = 182.6866, GNorm = 0.1288, lr_0 = 2.0088e-04
Loss = 6.5384e-03, PNorm = 182.6894, GNorm = 0.0712, lr_0 = 2.0075e-04
Loss = 5.6799e-03, PNorm = 182.6940, GNorm = 0.1340, lr_0 = 2.0061e-04
Loss = 3.6273e-03, PNorm = 182.7013, GNorm = 0.2241, lr_0 = 2.0047e-04
Loss = 4.4418e-03, PNorm = 182.7074, GNorm = 0.1868, lr_0 = 2.0033e-04
Loss = 3.4833e-03, PNorm = 182.7103, GNorm = 0.1909, lr_0 = 2.0020e-04
Loss = 8.8586e-03, PNorm = 182.7156, GNorm = 0.0960, lr_0 = 2.0006e-04
Loss = 7.9886e-03, PNorm = 182.7157, GNorm = 0.2113, lr_0 = 1.9992e-04
Loss = 1.0555e-02, PNorm = 182.7194, GNorm = 0.0946, lr_0 = 1.9979e-04
Loss = 4.4402e-03, PNorm = 182.7226, GNorm = 0.1584, lr_0 = 1.9965e-04
Loss = 3.8126e-03, PNorm = 182.7283, GNorm = 0.0787, lr_0 = 1.9951e-04
Loss = 5.3388e-03, PNorm = 182.7331, GNorm = 0.1403, lr_0 = 1.9938e-04
Loss = 5.9109e-03, PNorm = 182.7385, GNorm = 0.7977, lr_0 = 1.9924e-04
Loss = 9.5967e-03, PNorm = 182.7446, GNorm = 0.1593, lr_0 = 1.9910e-04
Loss = 6.3921e-03, PNorm = 182.7509, GNorm = 0.2808, lr_0 = 1.9897e-04
Loss = 8.0583e-03, PNorm = 182.7559, GNorm = 0.3995, lr_0 = 1.9883e-04
Loss = 3.9784e-03, PNorm = 182.7603, GNorm = 0.1517, lr_0 = 1.9869e-04
Loss = 4.7956e-03, PNorm = 182.7657, GNorm = 0.2258, lr_0 = 1.9856e-04
Loss = 8.5614e-03, PNorm = 182.7706, GNorm = 0.1296, lr_0 = 1.9842e-04
Loss = 6.0789e-03, PNorm = 182.7782, GNorm = 0.2230, lr_0 = 1.9829e-04
Loss = 5.1939e-03, PNorm = 182.7810, GNorm = 0.1474, lr_0 = 1.9815e-04
Loss = 3.8195e-03, PNorm = 182.7862, GNorm = 0.1266, lr_0 = 1.9801e-04
Loss = 4.0267e-03, PNorm = 182.7929, GNorm = 0.1270, lr_0 = 1.9788e-04
Loss = 4.9444e-03, PNorm = 182.7960, GNorm = 0.1583, lr_0 = 1.9774e-04
Loss = 3.9576e-03, PNorm = 182.7991, GNorm = 0.1097, lr_0 = 1.9761e-04
Loss = 7.0599e-03, PNorm = 182.8020, GNorm = 0.2076, lr_0 = 1.9747e-04
Loss = 4.6862e-03, PNorm = 182.8047, GNorm = 0.2001, lr_0 = 1.9734e-04
Loss = 7.4408e-03, PNorm = 182.8080, GNorm = 0.3170, lr_0 = 1.9720e-04
Loss = 5.0813e-03, PNorm = 182.8134, GNorm = 0.1881, lr_0 = 1.9707e-04
Loss = 3.4849e-03, PNorm = 182.8182, GNorm = 0.1092, lr_0 = 1.9693e-04
Loss = 6.8950e-03, PNorm = 182.8228, GNorm = 0.1354, lr_0 = 1.9680e-04
Loss = 3.9566e-03, PNorm = 182.8255, GNorm = 0.1324, lr_0 = 1.9666e-04
Loss = 6.3437e-03, PNorm = 182.8301, GNorm = 0.1103, lr_0 = 1.9653e-04
Loss = 4.7539e-03, PNorm = 182.8346, GNorm = 0.1624, lr_0 = 1.9639e-04
Loss = 3.6139e-03, PNorm = 182.8389, GNorm = 0.1076, lr_0 = 1.9626e-04
Loss = 4.5061e-03, PNorm = 182.8445, GNorm = 0.0959, lr_0 = 1.9612e-04
Loss = 8.7940e-03, PNorm = 182.8485, GNorm = 0.1113, lr_0 = 1.9599e-04
Loss = 3.9149e-03, PNorm = 182.8552, GNorm = 0.1084, lr_0 = 1.9585e-04
Loss = 5.7675e-03, PNorm = 182.8611, GNorm = 0.0992, lr_0 = 1.9572e-04
Loss = 6.7196e-03, PNorm = 182.8664, GNorm = 0.1819, lr_0 = 1.9559e-04
Loss = 5.0153e-03, PNorm = 182.8722, GNorm = 0.1905, lr_0 = 1.9545e-04
Loss = 3.6490e-03, PNorm = 182.8756, GNorm = 0.1305, lr_0 = 1.9532e-04
Loss = 4.2199e-03, PNorm = 182.8791, GNorm = 0.3554, lr_0 = 1.9518e-04
Loss = 3.7588e-03, PNorm = 182.8847, GNorm = 0.2043, lr_0 = 1.9505e-04
Loss = 5.1374e-03, PNorm = 182.8904, GNorm = 0.1490, lr_0 = 1.9492e-04
Loss = 3.2922e-03, PNorm = 182.8946, GNorm = 0.2160, lr_0 = 1.9478e-04
Loss = 6.3166e-03, PNorm = 182.8988, GNorm = 0.2113, lr_0 = 1.9465e-04
Loss = 8.5929e-03, PNorm = 182.9019, GNorm = 0.1015, lr_0 = 1.9452e-04
Loss = 5.2821e-03, PNorm = 182.9046, GNorm = 0.1567, lr_0 = 1.9438e-04
Loss = 4.2544e-03, PNorm = 182.9091, GNorm = 0.0822, lr_0 = 1.9425e-04
Loss = 2.2343e-02, PNorm = 182.9172, GNorm = 0.4314, lr_0 = 1.9412e-04
Loss = 9.8544e-03, PNorm = 182.9204, GNorm = 0.1268, lr_0 = 1.9398e-04
Loss = 4.9108e-03, PNorm = 182.9246, GNorm = 0.1396, lr_0 = 1.9385e-04
Loss = 9.5099e-03, PNorm = 182.9292, GNorm = 0.1107, lr_0 = 1.9372e-04
Loss = 7.8579e-03, PNorm = 182.9344, GNorm = 0.3681, lr_0 = 1.9359e-04
Loss = 3.8707e-03, PNorm = 182.9398, GNorm = 0.0946, lr_0 = 1.9345e-04
Loss = 3.8244e-03, PNorm = 182.9448, GNorm = 0.0896, lr_0 = 1.9332e-04
Loss = 4.3126e-03, PNorm = 182.9486, GNorm = 0.2525, lr_0 = 1.9319e-04
Loss = 4.2017e-03, PNorm = 182.9527, GNorm = 0.2241, lr_0 = 1.9306e-04
Validation mae = 0.120924
Epoch 22
Loss = 4.8392e-03, PNorm = 182.9541, GNorm = 0.1734, lr_0 = 1.9292e-04
Loss = 3.2324e-03, PNorm = 182.9568, GNorm = 0.3238, lr_0 = 1.9279e-04
Loss = 3.3657e-03, PNorm = 182.9604, GNorm = 0.2786, lr_0 = 1.9266e-04
Loss = 3.7966e-03, PNorm = 182.9630, GNorm = 0.2749, lr_0 = 1.9253e-04
Loss = 3.3675e-03, PNorm = 182.9667, GNorm = 0.1351, lr_0 = 1.9240e-04
Loss = 6.2264e-03, PNorm = 182.9718, GNorm = 0.1537, lr_0 = 1.9226e-04
Loss = 7.4032e-03, PNorm = 182.9764, GNorm = 0.1863, lr_0 = 1.9213e-04
Loss = 4.1556e-03, PNorm = 182.9800, GNorm = 0.1204, lr_0 = 1.9200e-04
Loss = 1.0066e-02, PNorm = 182.9862, GNorm = 0.1252, lr_0 = 1.9187e-04
Loss = 5.4561e-03, PNorm = 182.9901, GNorm = 0.3331, lr_0 = 1.9174e-04
Loss = 5.2623e-03, PNorm = 182.9936, GNorm = 0.1676, lr_0 = 1.9161e-04
Loss = 3.7174e-03, PNorm = 182.9971, GNorm = 0.1307, lr_0 = 1.9148e-04
Loss = 4.7392e-03, PNorm = 182.9986, GNorm = 0.0587, lr_0 = 1.9134e-04
Loss = 7.5163e-03, PNorm = 183.0008, GNorm = 0.0735, lr_0 = 1.9121e-04
Loss = 8.7850e-03, PNorm = 183.0047, GNorm = 0.1582, lr_0 = 1.9108e-04
Loss = 4.4455e-03, PNorm = 183.0092, GNorm = 0.0874, lr_0 = 1.9095e-04
Loss = 4.3914e-03, PNorm = 183.0147, GNorm = 0.2070, lr_0 = 1.9082e-04
Loss = 8.5603e-03, PNorm = 183.0182, GNorm = 0.1319, lr_0 = 1.9069e-04
Loss = 4.4000e-03, PNorm = 183.0227, GNorm = 0.0910, lr_0 = 1.9056e-04
Loss = 7.1742e-03, PNorm = 183.0263, GNorm = 0.1142, lr_0 = 1.9043e-04
Loss = 3.5677e-03, PNorm = 183.0300, GNorm = 0.1382, lr_0 = 1.9030e-04
Loss = 2.8763e-03, PNorm = 183.0335, GNorm = 0.1038, lr_0 = 1.9017e-04
Loss = 3.3359e-03, PNorm = 183.0379, GNorm = 0.1412, lr_0 = 1.9004e-04
Loss = 2.7093e-03, PNorm = 183.0415, GNorm = 0.1093, lr_0 = 1.8991e-04
Loss = 4.8133e-03, PNorm = 183.0446, GNorm = 0.1748, lr_0 = 1.8978e-04
Loss = 3.5716e-03, PNorm = 183.0487, GNorm = 0.1208, lr_0 = 1.8965e-04
Loss = 2.6680e-03, PNorm = 183.0527, GNorm = 0.1850, lr_0 = 1.8952e-04
Loss = 6.4219e-03, PNorm = 183.0551, GNorm = 0.1364, lr_0 = 1.8939e-04
Loss = 5.7885e-03, PNorm = 183.0569, GNorm = 0.1198, lr_0 = 1.8926e-04
Loss = 5.3322e-03, PNorm = 183.0614, GNorm = 0.2228, lr_0 = 1.8913e-04
Loss = 3.6153e-03, PNorm = 183.0647, GNorm = 0.2091, lr_0 = 1.8900e-04
Loss = 4.7184e-03, PNorm = 183.0674, GNorm = 0.3820, lr_0 = 1.8887e-04
Loss = 6.4058e-03, PNorm = 183.0699, GNorm = 0.1286, lr_0 = 1.8874e-04
Loss = 6.7350e-03, PNorm = 183.0727, GNorm = 0.2246, lr_0 = 1.8861e-04
Loss = 3.2105e-03, PNorm = 183.0765, GNorm = 0.0726, lr_0 = 1.8848e-04
Loss = 4.0690e-03, PNorm = 183.0826, GNorm = 0.1416, lr_0 = 1.8835e-04
Loss = 3.1080e-03, PNorm = 183.0880, GNorm = 0.1208, lr_0 = 1.8822e-04
Loss = 3.1227e-03, PNorm = 183.0907, GNorm = 0.1451, lr_0 = 1.8809e-04
Loss = 3.7566e-03, PNorm = 183.0958, GNorm = 0.1173, lr_0 = 1.8797e-04
Loss = 4.5311e-03, PNorm = 183.1005, GNorm = 0.1585, lr_0 = 1.8784e-04
Loss = 4.6624e-03, PNorm = 183.1049, GNorm = 0.0757, lr_0 = 1.8771e-04
Loss = 3.3828e-03, PNorm = 183.1082, GNorm = 0.2214, lr_0 = 1.8758e-04
Loss = 4.5662e-03, PNorm = 183.1110, GNorm = 0.1290, lr_0 = 1.8745e-04
Loss = 4.5704e-03, PNorm = 183.1134, GNorm = 0.1419, lr_0 = 1.8732e-04
Loss = 2.8044e-03, PNorm = 183.1162, GNorm = 0.1315, lr_0 = 1.8719e-04
Loss = 3.2235e-03, PNorm = 183.1184, GNorm = 0.1679, lr_0 = 1.8707e-04
Loss = 4.7023e-03, PNorm = 183.1229, GNorm = 0.3056, lr_0 = 1.8694e-04
Loss = 3.9730e-03, PNorm = 183.1277, GNorm = 0.2050, lr_0 = 1.8681e-04
Loss = 2.6512e-03, PNorm = 183.1314, GNorm = 0.1446, lr_0 = 1.8668e-04
Loss = 6.6890e-03, PNorm = 183.1350, GNorm = 0.1445, lr_0 = 1.8655e-04
Loss = 4.4199e-03, PNorm = 183.1389, GNorm = 0.1426, lr_0 = 1.8643e-04
Loss = 4.5155e-03, PNorm = 183.1417, GNorm = 0.1385, lr_0 = 1.8630e-04
Loss = 3.2004e-03, PNorm = 183.1453, GNorm = 0.0940, lr_0 = 1.8617e-04
Loss = 2.9030e-03, PNorm = 183.1493, GNorm = 0.1761, lr_0 = 1.8604e-04
Loss = 4.2352e-03, PNorm = 183.1501, GNorm = 0.1213, lr_0 = 1.8592e-04
Loss = 3.6634e-03, PNorm = 183.1527, GNorm = 0.1278, lr_0 = 1.8579e-04
Loss = 6.1878e-03, PNorm = 183.1573, GNorm = 0.1686, lr_0 = 1.8566e-04
Loss = 3.4294e-03, PNorm = 183.1615, GNorm = 0.4348, lr_0 = 1.8553e-04
Loss = 3.2093e-03, PNorm = 183.1672, GNorm = 0.3421, lr_0 = 1.8541e-04
Loss = 4.1893e-03, PNorm = 183.1693, GNorm = 0.1182, lr_0 = 1.8528e-04
Loss = 6.5598e-03, PNorm = 183.1738, GNorm = 0.4011, lr_0 = 1.8515e-04
Loss = 8.7376e-03, PNorm = 183.1800, GNorm = 0.1814, lr_0 = 1.8503e-04
Loss = 5.6125e-03, PNorm = 183.1830, GNorm = 0.2285, lr_0 = 1.8490e-04
Loss = 4.7025e-03, PNorm = 183.1866, GNorm = 0.1804, lr_0 = 1.8477e-04
Loss = 5.5831e-03, PNorm = 183.1905, GNorm = 0.3104, lr_0 = 1.8465e-04
Loss = 2.6443e-03, PNorm = 183.1953, GNorm = 0.0675, lr_0 = 1.8452e-04
Loss = 7.4010e-03, PNorm = 183.1989, GNorm = 0.2331, lr_0 = 1.8439e-04
Loss = 4.1108e-03, PNorm = 183.2031, GNorm = 0.1391, lr_0 = 1.8427e-04
Loss = 4.7281e-03, PNorm = 183.2065, GNorm = 0.0914, lr_0 = 1.8414e-04
Loss = 1.9211e-02, PNorm = 183.2108, GNorm = 0.1774, lr_0 = 1.8401e-04
Loss = 5.9131e-03, PNorm = 183.2138, GNorm = 0.1269, lr_0 = 1.8389e-04
Loss = 3.6982e-03, PNorm = 183.2191, GNorm = 0.0912, lr_0 = 1.8376e-04
Loss = 8.2791e-03, PNorm = 183.2223, GNorm = 0.0716, lr_0 = 1.8364e-04
Loss = 6.1669e-03, PNorm = 183.2254, GNorm = 0.1146, lr_0 = 1.8351e-04
Loss = 3.6935e-03, PNorm = 183.2286, GNorm = 0.1983, lr_0 = 1.8338e-04
Loss = 4.5970e-03, PNorm = 183.2312, GNorm = 0.1386, lr_0 = 1.8326e-04
Loss = 5.9478e-03, PNorm = 183.2336, GNorm = 0.1236, lr_0 = 1.8313e-04
Loss = 3.7062e-03, PNorm = 183.2359, GNorm = 0.1166, lr_0 = 1.8301e-04
Loss = 5.6463e-03, PNorm = 183.2390, GNorm = 0.0972, lr_0 = 1.8288e-04
Loss = 3.6944e-03, PNorm = 183.2423, GNorm = 0.1150, lr_0 = 1.8276e-04
Loss = 2.7976e-03, PNorm = 183.2458, GNorm = 0.4007, lr_0 = 1.8263e-04
Loss = 4.5491e-03, PNorm = 183.2480, GNorm = 0.1055, lr_0 = 1.8251e-04
Loss = 5.5466e-03, PNorm = 183.2523, GNorm = 0.1164, lr_0 = 1.8238e-04
Loss = 6.1082e-03, PNorm = 183.2590, GNorm = 0.1955, lr_0 = 1.8226e-04
Loss = 1.5371e-02, PNorm = 183.2624, GNorm = 1.2008, lr_0 = 1.8213e-04
Loss = 5.2789e-03, PNorm = 183.2681, GNorm = 0.2068, lr_0 = 1.8201e-04
Loss = 1.0819e-02, PNorm = 183.2726, GNorm = 0.0753, lr_0 = 1.8188e-04
Loss = 7.2490e-03, PNorm = 183.2767, GNorm = 0.2133, lr_0 = 1.8176e-04
Loss = 3.4758e-03, PNorm = 183.2801, GNorm = 0.2347, lr_0 = 1.8163e-04
Loss = 5.3885e-03, PNorm = 183.2817, GNorm = 0.2645, lr_0 = 1.8151e-04
Loss = 3.6279e-03, PNorm = 183.2864, GNorm = 0.1746, lr_0 = 1.8138e-04
Loss = 5.7871e-03, PNorm = 183.2930, GNorm = 0.2062, lr_0 = 1.8126e-04
Loss = 5.7605e-03, PNorm = 183.2973, GNorm = 0.1572, lr_0 = 1.8114e-04
Loss = 4.2088e-03, PNorm = 183.3005, GNorm = 0.0817, lr_0 = 1.8101e-04
Loss = 7.7549e-03, PNorm = 183.3047, GNorm = 0.1040, lr_0 = 1.8089e-04
Loss = 3.4694e-03, PNorm = 183.3076, GNorm = 0.1466, lr_0 = 1.8076e-04
Loss = 4.4022e-03, PNorm = 183.3104, GNorm = 0.1745, lr_0 = 1.8064e-04
Loss = 7.6939e-03, PNorm = 183.3140, GNorm = 0.3267, lr_0 = 1.8052e-04
Loss = 3.1562e-03, PNorm = 183.3198, GNorm = 0.1411, lr_0 = 1.8039e-04
Loss = 4.8673e-03, PNorm = 183.3239, GNorm = 0.1595, lr_0 = 1.8027e-04
Loss = 6.2656e-03, PNorm = 183.3300, GNorm = 0.1407, lr_0 = 1.8015e-04
Loss = 8.8440e-03, PNorm = 183.3357, GNorm = 0.1126, lr_0 = 1.8002e-04
Loss = 4.4494e-03, PNorm = 183.3401, GNorm = 0.2691, lr_0 = 1.7990e-04
Loss = 5.2314e-03, PNorm = 183.3432, GNorm = 0.1115, lr_0 = 1.7978e-04
Loss = 4.9579e-03, PNorm = 183.3464, GNorm = 0.1828, lr_0 = 1.7965e-04
Loss = 4.6979e-03, PNorm = 183.3496, GNorm = 0.3243, lr_0 = 1.7953e-04
Loss = 5.0795e-03, PNorm = 183.3546, GNorm = 0.0670, lr_0 = 1.7941e-04
Loss = 7.4731e-03, PNorm = 183.3597, GNorm = 0.1028, lr_0 = 1.7928e-04
Loss = 4.8644e-03, PNorm = 183.3638, GNorm = 0.1644, lr_0 = 1.7916e-04
Loss = 2.7799e-03, PNorm = 183.3670, GNorm = 0.1206, lr_0 = 1.7904e-04
Loss = 3.9326e-03, PNorm = 183.3703, GNorm = 0.2905, lr_0 = 1.7892e-04
Loss = 4.1893e-03, PNorm = 183.3748, GNorm = 0.1369, lr_0 = 1.7879e-04
Loss = 4.9504e-03, PNorm = 183.3802, GNorm = 0.2348, lr_0 = 1.7867e-04
Loss = 4.2351e-03, PNorm = 183.3860, GNorm = 0.1334, lr_0 = 1.7855e-04
Loss = 3.1797e-03, PNorm = 183.3906, GNorm = 0.1342, lr_0 = 1.7843e-04
Loss = 6.6701e-03, PNorm = 183.3944, GNorm = 0.7414, lr_0 = 1.7830e-04
Loss = 7.1559e-03, PNorm = 183.3983, GNorm = 0.0929, lr_0 = 1.7818e-04
Loss = 3.9775e-03, PNorm = 183.4015, GNorm = 0.3879, lr_0 = 1.7806e-04
Loss = 2.4960e-03, PNorm = 183.4053, GNorm = 0.1240, lr_0 = 1.7794e-04
Loss = 2.6552e-03, PNorm = 183.4092, GNorm = 0.1502, lr_0 = 1.7782e-04
Validation mae = 0.120716
Epoch 23
Loss = 2.5205e-03, PNorm = 183.4112, GNorm = 0.1217, lr_0 = 1.7769e-04
Loss = 2.7275e-03, PNorm = 183.4126, GNorm = 0.1254, lr_0 = 1.7757e-04
Loss = 3.3799e-03, PNorm = 183.4157, GNorm = 0.1077, lr_0 = 1.7745e-04
Loss = 6.6462e-03, PNorm = 183.4193, GNorm = 0.2759, lr_0 = 1.7733e-04
Loss = 3.0238e-03, PNorm = 183.4236, GNorm = 0.1417, lr_0 = 1.7721e-04
Loss = 2.9623e-03, PNorm = 183.4269, GNorm = 0.1170, lr_0 = 1.7709e-04
Loss = 3.8264e-03, PNorm = 183.4278, GNorm = 0.1982, lr_0 = 1.7696e-04
Loss = 3.6694e-03, PNorm = 183.4306, GNorm = 0.0679, lr_0 = 1.7684e-04
Loss = 3.9236e-03, PNorm = 183.4337, GNorm = 0.0718, lr_0 = 1.7672e-04
Loss = 3.7096e-03, PNorm = 183.4357, GNorm = 0.1270, lr_0 = 1.7660e-04
Loss = 3.9752e-03, PNorm = 183.4381, GNorm = 0.1740, lr_0 = 1.7648e-04
Loss = 4.9134e-03, PNorm = 183.4420, GNorm = 0.1341, lr_0 = 1.7636e-04
Loss = 2.3459e-03, PNorm = 183.4445, GNorm = 0.1489, lr_0 = 1.7624e-04
Loss = 6.1136e-03, PNorm = 183.4478, GNorm = 0.3035, lr_0 = 1.7612e-04
Loss = 1.4080e-02, PNorm = 183.4494, GNorm = 2.6398, lr_0 = 1.7600e-04
Loss = 3.8016e-03, PNorm = 183.4526, GNorm = 0.4071, lr_0 = 1.7588e-04
Loss = 6.0675e-03, PNorm = 183.4568, GNorm = 0.0902, lr_0 = 1.7576e-04
Loss = 3.7251e-03, PNorm = 183.4626, GNorm = 0.3931, lr_0 = 1.7564e-04
Loss = 2.5917e-03, PNorm = 183.4652, GNorm = 0.1932, lr_0 = 1.7552e-04
Loss = 3.8094e-03, PNorm = 183.4662, GNorm = 0.1140, lr_0 = 1.7540e-04
Loss = 5.3931e-03, PNorm = 183.4675, GNorm = 0.1600, lr_0 = 1.7528e-04
Loss = 5.8970e-03, PNorm = 183.4715, GNorm = 0.4130, lr_0 = 1.7516e-04
Loss = 2.5743e-03, PNorm = 183.4743, GNorm = 0.1508, lr_0 = 1.7504e-04
Loss = 6.3975e-03, PNorm = 183.4759, GNorm = 0.1132, lr_0 = 1.7492e-04
Loss = 4.1909e-03, PNorm = 183.4781, GNorm = 0.3607, lr_0 = 1.7480e-04
Loss = 2.9398e-03, PNorm = 183.4801, GNorm = 0.2154, lr_0 = 1.7468e-04
Loss = 4.2223e-03, PNorm = 183.4821, GNorm = 0.1522, lr_0 = 1.7456e-04
Loss = 4.9939e-03, PNorm = 183.4832, GNorm = 0.1610, lr_0 = 1.7444e-04
Loss = 8.0509e-03, PNorm = 183.4852, GNorm = 0.3228, lr_0 = 1.7432e-04
Loss = 2.5856e-03, PNorm = 183.4867, GNorm = 0.0659, lr_0 = 1.7420e-04
Loss = 3.2394e-03, PNorm = 183.4904, GNorm = 0.0851, lr_0 = 1.7408e-04
Loss = 3.6980e-03, PNorm = 183.4936, GNorm = 0.1203, lr_0 = 1.7396e-04
Loss = 3.9003e-03, PNorm = 183.4979, GNorm = 0.1904, lr_0 = 1.7384e-04
Loss = 4.3600e-03, PNorm = 183.5003, GNorm = 0.0948, lr_0 = 1.7372e-04
Loss = 8.1187e-03, PNorm = 183.5016, GNorm = 0.1192, lr_0 = 1.7360e-04
Loss = 2.8319e-03, PNorm = 183.5051, GNorm = 0.1019, lr_0 = 1.7348e-04
Loss = 4.0293e-03, PNorm = 183.5087, GNorm = 0.0860, lr_0 = 1.7336e-04
Loss = 2.0600e-03, PNorm = 183.5107, GNorm = 0.1459, lr_0 = 1.7325e-04
Loss = 6.9406e-03, PNorm = 183.5141, GNorm = 0.1919, lr_0 = 1.7313e-04
Loss = 4.0546e-03, PNorm = 183.5191, GNorm = 0.1405, lr_0 = 1.7301e-04
Loss = 4.3077e-03, PNorm = 183.5248, GNorm = 0.0918, lr_0 = 1.7289e-04
Loss = 3.6555e-03, PNorm = 183.5283, GNorm = 0.2212, lr_0 = 1.7277e-04
Loss = 8.1635e-03, PNorm = 183.5320, GNorm = 0.0854, lr_0 = 1.7265e-04
Loss = 6.1478e-03, PNorm = 183.5332, GNorm = 0.2367, lr_0 = 1.7253e-04
Loss = 2.3676e-03, PNorm = 183.5350, GNorm = 0.0975, lr_0 = 1.7242e-04
Loss = 2.7593e-03, PNorm = 183.5361, GNorm = 0.0942, lr_0 = 1.7230e-04
Loss = 2.5475e-03, PNorm = 183.5387, GNorm = 0.1376, lr_0 = 1.7218e-04
Loss = 8.7466e-03, PNorm = 183.5428, GNorm = 0.0715, lr_0 = 1.7206e-04
Loss = 3.6145e-03, PNorm = 183.5479, GNorm = 0.0698, lr_0 = 1.7194e-04
Loss = 3.0361e-03, PNorm = 183.5516, GNorm = 0.0857, lr_0 = 1.7183e-04
Loss = 4.8212e-03, PNorm = 183.5555, GNorm = 0.1035, lr_0 = 1.7171e-04
Loss = 5.8574e-03, PNorm = 183.5570, GNorm = 0.1576, lr_0 = 1.7159e-04
Loss = 4.6222e-03, PNorm = 183.5570, GNorm = 0.1525, lr_0 = 1.7147e-04
Loss = 9.4422e-03, PNorm = 183.5597, GNorm = 0.1933, lr_0 = 1.7136e-04
Loss = 6.5078e-03, PNorm = 183.5640, GNorm = 0.1080, lr_0 = 1.7124e-04
Loss = 7.0291e-03, PNorm = 183.5680, GNorm = 0.3357, lr_0 = 1.7112e-04
Loss = 3.2242e-03, PNorm = 183.5725, GNorm = 0.1597, lr_0 = 1.7100e-04
Loss = 4.1033e-03, PNorm = 183.5774, GNorm = 0.2430, lr_0 = 1.7089e-04
Loss = 4.1133e-03, PNorm = 183.5802, GNorm = 0.1479, lr_0 = 1.7077e-04
Loss = 4.3301e-03, PNorm = 183.5821, GNorm = 0.2345, lr_0 = 1.7065e-04
Loss = 3.7313e-03, PNorm = 183.5826, GNorm = 0.3183, lr_0 = 1.7054e-04
Loss = 2.1955e-03, PNorm = 183.5848, GNorm = 0.0910, lr_0 = 1.7042e-04
Loss = 5.3537e-03, PNorm = 183.5876, GNorm = 0.0579, lr_0 = 1.7030e-04
Loss = 4.7909e-03, PNorm = 183.5911, GNorm = 0.1883, lr_0 = 1.7019e-04
Loss = 5.1926e-03, PNorm = 183.5943, GNorm = 0.0630, lr_0 = 1.7007e-04
Loss = 5.1521e-03, PNorm = 183.5971, GNorm = 0.2080, lr_0 = 1.6995e-04
Loss = 4.0071e-03, PNorm = 183.5998, GNorm = 0.1367, lr_0 = 1.6984e-04
Loss = 6.0434e-03, PNorm = 183.6019, GNorm = 0.1570, lr_0 = 1.6972e-04
Loss = 5.5398e-03, PNorm = 183.6044, GNorm = 0.2370, lr_0 = 1.6960e-04
Loss = 4.8734e-03, PNorm = 183.6054, GNorm = 0.1274, lr_0 = 1.6949e-04
Loss = 3.9210e-03, PNorm = 183.6101, GNorm = 0.2244, lr_0 = 1.6937e-04
Loss = 2.2763e-03, PNorm = 183.6157, GNorm = 0.1059, lr_0 = 1.6926e-04
Loss = 4.2842e-03, PNorm = 183.6206, GNorm = 0.6984, lr_0 = 1.6914e-04
Loss = 3.1364e-03, PNorm = 183.6236, GNorm = 0.1591, lr_0 = 1.6902e-04
Loss = 3.8673e-03, PNorm = 183.6273, GNorm = 0.1095, lr_0 = 1.6891e-04
Loss = 2.5083e-03, PNorm = 183.6303, GNorm = 0.1463, lr_0 = 1.6879e-04
Loss = 6.0037e-03, PNorm = 183.6319, GNorm = 0.1142, lr_0 = 1.6868e-04
Loss = 2.5578e-03, PNorm = 183.6336, GNorm = 0.0827, lr_0 = 1.6856e-04
Loss = 2.8780e-03, PNorm = 183.6379, GNorm = 0.1332, lr_0 = 1.6845e-04
Loss = 6.7462e-03, PNorm = 183.6428, GNorm = 0.2265, lr_0 = 1.6833e-04
Loss = 3.3062e-03, PNorm = 183.6462, GNorm = 0.1682, lr_0 = 1.6821e-04
Loss = 2.7033e-03, PNorm = 183.6512, GNorm = 0.1152, lr_0 = 1.6810e-04
Loss = 3.0838e-03, PNorm = 183.6567, GNorm = 0.1699, lr_0 = 1.6798e-04
Loss = 6.6521e-03, PNorm = 183.6608, GNorm = 0.3352, lr_0 = 1.6787e-04
Loss = 4.9077e-03, PNorm = 183.6637, GNorm = 0.2932, lr_0 = 1.6775e-04
Loss = 3.1093e-03, PNorm = 183.6661, GNorm = 0.1914, lr_0 = 1.6764e-04
Loss = 2.2562e-03, PNorm = 183.6682, GNorm = 0.0863, lr_0 = 1.6752e-04
Loss = 3.6208e-03, PNorm = 183.6721, GNorm = 0.5900, lr_0 = 1.6741e-04
Loss = 3.3107e-03, PNorm = 183.6750, GNorm = 0.2089, lr_0 = 1.6729e-04
Loss = 5.0867e-03, PNorm = 183.6770, GNorm = 0.3617, lr_0 = 1.6718e-04
Loss = 5.4898e-03, PNorm = 183.6788, GNorm = 0.0975, lr_0 = 1.6707e-04
Loss = 6.6184e-03, PNorm = 183.6804, GNorm = 0.1464, lr_0 = 1.6695e-04
Loss = 5.8038e-03, PNorm = 183.6829, GNorm = 0.4563, lr_0 = 1.6684e-04
Loss = 4.3091e-03, PNorm = 183.6865, GNorm = 0.1510, lr_0 = 1.6672e-04
Loss = 6.3078e-03, PNorm = 183.6904, GNorm = 0.4386, lr_0 = 1.6661e-04
Loss = 8.3431e-03, PNorm = 183.6924, GNorm = 0.2152, lr_0 = 1.6649e-04
Loss = 3.9134e-03, PNorm = 183.6950, GNorm = 0.1298, lr_0 = 1.6638e-04
Loss = 5.1783e-03, PNorm = 183.6998, GNorm = 0.1257, lr_0 = 1.6627e-04
Loss = 4.9810e-03, PNorm = 183.7035, GNorm = 0.1971, lr_0 = 1.6615e-04
Loss = 1.9189e-02, PNorm = 183.7061, GNorm = 0.0984, lr_0 = 1.6604e-04
Loss = 2.7842e-03, PNorm = 183.7082, GNorm = 0.2392, lr_0 = 1.6592e-04
Loss = 6.9735e-03, PNorm = 183.7131, GNorm = 0.4946, lr_0 = 1.6581e-04
Loss = 5.1436e-03, PNorm = 183.7178, GNorm = 0.0822, lr_0 = 1.6570e-04
Loss = 6.1505e-03, PNorm = 183.7223, GNorm = 0.1632, lr_0 = 1.6558e-04
Loss = 5.3143e-03, PNorm = 183.7251, GNorm = 0.0800, lr_0 = 1.6547e-04
Loss = 4.3386e-03, PNorm = 183.7279, GNorm = 0.2698, lr_0 = 1.6536e-04
Loss = 3.5748e-03, PNorm = 183.7291, GNorm = 0.0837, lr_0 = 1.6524e-04
Loss = 3.3572e-03, PNorm = 183.7305, GNorm = 0.1156, lr_0 = 1.6513e-04
Loss = 2.5625e-03, PNorm = 183.7339, GNorm = 0.1094, lr_0 = 1.6502e-04
Loss = 4.3764e-03, PNorm = 183.7368, GNorm = 0.1957, lr_0 = 1.6490e-04
Loss = 3.7787e-03, PNorm = 183.7390, GNorm = 0.0892, lr_0 = 1.6479e-04
Loss = 2.7757e-03, PNorm = 183.7416, GNorm = 0.0942, lr_0 = 1.6468e-04
Loss = 4.0476e-03, PNorm = 183.7458, GNorm = 0.1661, lr_0 = 1.6457e-04
Loss = 3.3722e-03, PNorm = 183.7500, GNorm = 0.1579, lr_0 = 1.6445e-04
Loss = 2.4398e-03, PNorm = 183.7540, GNorm = 0.1359, lr_0 = 1.6434e-04
Loss = 3.5287e-03, PNorm = 183.7579, GNorm = 0.1338, lr_0 = 1.6423e-04
Loss = 4.1386e-03, PNorm = 183.7614, GNorm = 0.3598, lr_0 = 1.6412e-04
Loss = 2.9568e-03, PNorm = 183.7632, GNorm = 0.1165, lr_0 = 1.6400e-04
Loss = 2.5560e-03, PNorm = 183.7662, GNorm = 0.0919, lr_0 = 1.6389e-04
Loss = 6.3123e-03, PNorm = 183.7712, GNorm = 0.1627, lr_0 = 1.6378e-04
Validation mae = 0.120726
Epoch 24
Loss = 2.9743e-03, PNorm = 183.7752, GNorm = 0.1168, lr_0 = 1.6367e-04
Loss = 3.1536e-03, PNorm = 183.7775, GNorm = 0.1653, lr_0 = 1.6355e-04
Loss = 3.1553e-03, PNorm = 183.7790, GNorm = 0.1034, lr_0 = 1.6344e-04
Loss = 2.0419e-03, PNorm = 183.7805, GNorm = 0.1858, lr_0 = 1.6333e-04
Loss = 2.9260e-03, PNorm = 183.7810, GNorm = 0.0728, lr_0 = 1.6322e-04
Loss = 3.5227e-03, PNorm = 183.7818, GNorm = 0.3470, lr_0 = 1.6311e-04
Loss = 3.6765e-03, PNorm = 183.7831, GNorm = 0.0643, lr_0 = 1.6299e-04
Loss = 2.5815e-03, PNorm = 183.7850, GNorm = 0.1668, lr_0 = 1.6288e-04
Loss = 2.2481e-03, PNorm = 183.7884, GNorm = 0.0693, lr_0 = 1.6277e-04
Loss = 2.0072e-03, PNorm = 183.7920, GNorm = 0.0627, lr_0 = 1.6266e-04
Loss = 3.6590e-03, PNorm = 183.7946, GNorm = 0.1181, lr_0 = 1.6255e-04
Loss = 6.0616e-03, PNorm = 183.7974, GNorm = 0.1487, lr_0 = 1.6244e-04
Loss = 3.0651e-03, PNorm = 183.8002, GNorm = 0.0885, lr_0 = 1.6233e-04
Loss = 2.2967e-03, PNorm = 183.8025, GNorm = 0.1231, lr_0 = 1.6221e-04
Loss = 2.8033e-03, PNorm = 183.8036, GNorm = 0.2133, lr_0 = 1.6210e-04
Loss = 4.3084e-03, PNorm = 183.8060, GNorm = 1.3643, lr_0 = 1.6199e-04
Loss = 2.7477e-03, PNorm = 183.8064, GNorm = 0.0916, lr_0 = 1.6188e-04
Loss = 3.7131e-03, PNorm = 183.8084, GNorm = 0.2035, lr_0 = 1.6177e-04
Loss = 4.4341e-03, PNorm = 183.8113, GNorm = 0.1042, lr_0 = 1.6166e-04
Loss = 2.9582e-03, PNorm = 183.8168, GNorm = 0.1508, lr_0 = 1.6155e-04
Loss = 3.1222e-03, PNorm = 183.8200, GNorm = 0.2236, lr_0 = 1.6144e-04
Loss = 2.1140e-03, PNorm = 183.8216, GNorm = 0.0933, lr_0 = 1.6133e-04
Loss = 4.5538e-03, PNorm = 183.8233, GNorm = 0.0947, lr_0 = 1.6122e-04
Loss = 2.6317e-03, PNorm = 183.8268, GNorm = 0.1857, lr_0 = 1.6111e-04
Loss = 2.5085e-03, PNorm = 183.8298, GNorm = 0.1334, lr_0 = 1.6100e-04
Loss = 2.1238e-03, PNorm = 183.8322, GNorm = 0.1517, lr_0 = 1.6089e-04
Loss = 2.8001e-03, PNorm = 183.8342, GNorm = 0.2200, lr_0 = 1.6078e-04
Loss = 2.5426e-03, PNorm = 183.8359, GNorm = 0.0541, lr_0 = 1.6067e-04
Loss = 2.2333e-03, PNorm = 183.8386, GNorm = 0.1180, lr_0 = 1.6056e-04
Loss = 3.0584e-03, PNorm = 183.8399, GNorm = 0.1265, lr_0 = 1.6045e-04
Loss = 5.0523e-03, PNorm = 183.8439, GNorm = 0.0841, lr_0 = 1.6034e-04
Loss = 2.6632e-03, PNorm = 183.8468, GNorm = 0.0820, lr_0 = 1.6023e-04
Loss = 4.1258e-03, PNorm = 183.8496, GNorm = 0.0894, lr_0 = 1.6012e-04
Loss = 4.3744e-03, PNorm = 183.8532, GNorm = 0.1640, lr_0 = 1.6001e-04
Loss = 2.9509e-03, PNorm = 183.8551, GNorm = 0.0868, lr_0 = 1.5990e-04
Loss = 2.5598e-03, PNorm = 183.8558, GNorm = 0.1780, lr_0 = 1.5979e-04
Loss = 4.8157e-03, PNorm = 183.8588, GNorm = 0.0878, lr_0 = 1.5968e-04
Loss = 2.7132e-03, PNorm = 183.8622, GNorm = 0.0504, lr_0 = 1.5957e-04
Loss = 3.9102e-03, PNorm = 183.8644, GNorm = 0.1213, lr_0 = 1.5946e-04
Loss = 4.2488e-03, PNorm = 183.8668, GNorm = 0.1640, lr_0 = 1.5935e-04
Loss = 2.5895e-03, PNorm = 183.8693, GNorm = 0.2229, lr_0 = 1.5924e-04
Loss = 2.5733e-03, PNorm = 183.8718, GNorm = 0.1001, lr_0 = 1.5913e-04
Loss = 3.2919e-03, PNorm = 183.8747, GNorm = 0.0955, lr_0 = 1.5902e-04
Loss = 4.8604e-03, PNorm = 183.8778, GNorm = 0.8467, lr_0 = 1.5891e-04
Loss = 2.9911e-03, PNorm = 183.8790, GNorm = 0.1182, lr_0 = 1.5880e-04
Loss = 2.8429e-03, PNorm = 183.8820, GNorm = 0.1186, lr_0 = 1.5870e-04
Loss = 3.2264e-03, PNorm = 183.8840, GNorm = 0.0751, lr_0 = 1.5859e-04
Loss = 2.5509e-03, PNorm = 183.8861, GNorm = 0.0609, lr_0 = 1.5848e-04
Loss = 4.1957e-03, PNorm = 183.8877, GNorm = 0.1823, lr_0 = 1.5837e-04
Loss = 3.9565e-03, PNorm = 183.8890, GNorm = 0.1593, lr_0 = 1.5826e-04
Loss = 6.3168e-03, PNorm = 183.8907, GNorm = 0.0943, lr_0 = 1.5815e-04
Loss = 3.9380e-03, PNorm = 183.8930, GNorm = 0.2614, lr_0 = 1.5804e-04
Loss = 7.2480e-03, PNorm = 183.8963, GNorm = 0.1189, lr_0 = 1.5794e-04
Loss = 7.6211e-03, PNorm = 183.8987, GNorm = 0.0807, lr_0 = 1.5783e-04
Loss = 2.6859e-03, PNorm = 183.9016, GNorm = 0.0868, lr_0 = 1.5772e-04
Loss = 4.0323e-03, PNorm = 183.9055, GNorm = 0.1211, lr_0 = 1.5761e-04
Loss = 4.6621e-03, PNorm = 183.9101, GNorm = 0.1701, lr_0 = 1.5750e-04
Loss = 2.1236e-03, PNorm = 183.9126, GNorm = 0.1007, lr_0 = 1.5740e-04
Loss = 2.5916e-03, PNorm = 183.9156, GNorm = 0.0634, lr_0 = 1.5729e-04
Loss = 2.0980e-03, PNorm = 183.9177, GNorm = 0.0677, lr_0 = 1.5718e-04
Loss = 5.4275e-03, PNorm = 183.9206, GNorm = 0.2886, lr_0 = 1.5707e-04
Loss = 3.0235e-03, PNorm = 183.9238, GNorm = 0.1212, lr_0 = 1.5697e-04
Loss = 5.3212e-03, PNorm = 183.9267, GNorm = 0.1154, lr_0 = 1.5686e-04
Loss = 4.2786e-03, PNorm = 183.9291, GNorm = 0.1298, lr_0 = 1.5675e-04
Loss = 3.1741e-03, PNorm = 183.9309, GNorm = 0.1056, lr_0 = 1.5664e-04
Loss = 2.3665e-03, PNorm = 183.9344, GNorm = 0.0936, lr_0 = 1.5654e-04
Loss = 2.0856e-03, PNorm = 183.9389, GNorm = 0.1257, lr_0 = 1.5643e-04
Loss = 2.9556e-03, PNorm = 183.9425, GNorm = 0.1709, lr_0 = 1.5632e-04
Loss = 2.5165e-03, PNorm = 183.9446, GNorm = 0.1598, lr_0 = 1.5621e-04
Loss = 3.4252e-03, PNorm = 183.9464, GNorm = 0.1661, lr_0 = 1.5611e-04
Loss = 3.4140e-03, PNorm = 183.9483, GNorm = 0.0930, lr_0 = 1.5600e-04
Loss = 2.2431e-03, PNorm = 183.9502, GNorm = 0.1685, lr_0 = 1.5589e-04
Loss = 9.4495e-03, PNorm = 183.9528, GNorm = 0.1985, lr_0 = 1.5579e-04
Loss = 3.2595e-03, PNorm = 183.9570, GNorm = 0.1454, lr_0 = 1.5568e-04
Loss = 4.7651e-03, PNorm = 183.9602, GNorm = 0.0873, lr_0 = 1.5557e-04
Loss = 2.2641e-03, PNorm = 183.9622, GNorm = 0.3290, lr_0 = 1.5547e-04
Loss = 2.4152e-03, PNorm = 183.9636, GNorm = 0.2083, lr_0 = 1.5536e-04
Loss = 5.1911e-03, PNorm = 183.9661, GNorm = 0.2409, lr_0 = 1.5525e-04
Loss = 3.5087e-03, PNorm = 183.9689, GNorm = 0.1198, lr_0 = 1.5515e-04
Loss = 8.5821e-03, PNorm = 183.9720, GNorm = 0.1618, lr_0 = 1.5504e-04
Loss = 2.7092e-03, PNorm = 183.9744, GNorm = 0.0920, lr_0 = 1.5493e-04
Loss = 5.4163e-03, PNorm = 183.9775, GNorm = 0.1053, lr_0 = 1.5483e-04
Loss = 5.6268e-03, PNorm = 183.9810, GNorm = 0.2193, lr_0 = 1.5472e-04
Loss = 2.4210e-03, PNorm = 183.9831, GNorm = 0.0768, lr_0 = 1.5462e-04
Loss = 8.6170e-03, PNorm = 183.9858, GNorm = 0.1585, lr_0 = 1.5451e-04
Loss = 5.3732e-03, PNorm = 183.9894, GNorm = 0.0786, lr_0 = 1.5440e-04
Loss = 4.1538e-03, PNorm = 183.9936, GNorm = 0.0868, lr_0 = 1.5430e-04
Loss = 2.2384e-03, PNorm = 183.9976, GNorm = 0.1652, lr_0 = 1.5419e-04
Loss = 6.9095e-03, PNorm = 184.0021, GNorm = 0.0880, lr_0 = 1.5409e-04
Loss = 6.0338e-03, PNorm = 184.0079, GNorm = 0.2382, lr_0 = 1.5398e-04
Loss = 4.8193e-03, PNorm = 184.0099, GNorm = 0.1380, lr_0 = 1.5388e-04
Loss = 3.7990e-03, PNorm = 184.0117, GNorm = 0.1331, lr_0 = 1.5377e-04
Loss = 9.5345e-03, PNorm = 184.0138, GNorm = 0.1261, lr_0 = 1.5367e-04
Loss = 7.5109e-03, PNorm = 184.0172, GNorm = 0.1921, lr_0 = 1.5356e-04
Loss = 3.1072e-03, PNorm = 184.0199, GNorm = 0.1799, lr_0 = 1.5346e-04
Loss = 7.0672e-03, PNorm = 184.0221, GNorm = 0.3193, lr_0 = 1.5335e-04
Loss = 3.1539e-03, PNorm = 184.0252, GNorm = 0.0882, lr_0 = 1.5325e-04
Loss = 6.7431e-03, PNorm = 184.0272, GNorm = 0.0946, lr_0 = 1.5314e-04
Loss = 3.0009e-03, PNorm = 184.0314, GNorm = 0.1183, lr_0 = 1.5304e-04
Loss = 5.5349e-03, PNorm = 184.0345, GNorm = 0.0564, lr_0 = 1.5293e-04
Loss = 6.3695e-03, PNorm = 184.0378, GNorm = 0.1092, lr_0 = 1.5283e-04
Loss = 3.5333e-03, PNorm = 184.0415, GNorm = 0.3262, lr_0 = 1.5272e-04
Loss = 3.0146e-03, PNorm = 184.0470, GNorm = 0.1920, lr_0 = 1.5262e-04
Loss = 4.3108e-03, PNorm = 184.0504, GNorm = 0.1509, lr_0 = 1.5251e-04
Loss = 9.2696e-03, PNorm = 184.0540, GNorm = 0.2040, lr_0 = 1.5241e-04
Loss = 4.1503e-03, PNorm = 184.0561, GNorm = 0.1594, lr_0 = 1.5230e-04
Loss = 3.5039e-03, PNorm = 184.0617, GNorm = 0.2592, lr_0 = 1.5220e-04
Loss = 4.0559e-03, PNorm = 184.0664, GNorm = 0.2518, lr_0 = 1.5209e-04
Loss = 4.6559e-03, PNorm = 184.0675, GNorm = 0.0694, lr_0 = 1.5199e-04
Loss = 2.5349e-03, PNorm = 184.0693, GNorm = 0.2116, lr_0 = 1.5189e-04
Loss = 3.2187e-03, PNorm = 184.0717, GNorm = 0.1182, lr_0 = 1.5178e-04
Loss = 3.9659e-03, PNorm = 184.0739, GNorm = 0.2122, lr_0 = 1.5168e-04
Loss = 6.9117e-03, PNorm = 184.0776, GNorm = 0.0850, lr_0 = 1.5157e-04
Loss = 1.4453e-02, PNorm = 184.0813, GNorm = 0.3409, lr_0 = 1.5147e-04
Loss = 5.4547e-03, PNorm = 184.0818, GNorm = 0.2003, lr_0 = 1.5137e-04
Loss = 5.7116e-03, PNorm = 184.0823, GNorm = 0.1730, lr_0 = 1.5126e-04
Loss = 7.9220e-03, PNorm = 184.0860, GNorm = 0.0847, lr_0 = 1.5116e-04
Loss = 3.3951e-03, PNorm = 184.0892, GNorm = 0.0998, lr_0 = 1.5106e-04
Loss = 6.4856e-03, PNorm = 184.0925, GNorm = 0.2011, lr_0 = 1.5095e-04
Loss = 6.3887e-03, PNorm = 184.0952, GNorm = 0.1204, lr_0 = 1.5085e-04
Validation mae = 0.120792
Epoch 25
Loss = 3.1540e-03, PNorm = 184.0962, GNorm = 0.0654, lr_0 = 1.5075e-04
Loss = 3.7372e-03, PNorm = 184.0985, GNorm = 0.2462, lr_0 = 1.5064e-04
Loss = 2.0340e-03, PNorm = 184.0997, GNorm = 0.1371, lr_0 = 1.5054e-04
Loss = 3.2993e-03, PNorm = 184.1029, GNorm = 0.1886, lr_0 = 1.5044e-04
Loss = 3.4721e-03, PNorm = 184.1057, GNorm = 0.2142, lr_0 = 1.5033e-04
Loss = 2.2731e-03, PNorm = 184.1083, GNorm = 0.0716, lr_0 = 1.5023e-04
Loss = 2.4092e-03, PNorm = 184.1108, GNorm = 0.1244, lr_0 = 1.5013e-04
Loss = 3.8711e-03, PNorm = 184.1126, GNorm = 0.2869, lr_0 = 1.5002e-04
Loss = 3.6924e-03, PNorm = 184.1133, GNorm = 0.1297, lr_0 = 1.4992e-04
Loss = 2.0936e-03, PNorm = 184.1170, GNorm = 0.0700, lr_0 = 1.4982e-04
Loss = 4.0640e-03, PNorm = 184.1197, GNorm = 0.2544, lr_0 = 1.4972e-04
Loss = 2.8316e-03, PNorm = 184.1220, GNorm = 0.2358, lr_0 = 1.4961e-04
Loss = 3.4968e-03, PNorm = 184.1258, GNorm = 0.1652, lr_0 = 1.4951e-04
Loss = 2.4855e-03, PNorm = 184.1281, GNorm = 0.1422, lr_0 = 1.4941e-04
Loss = 2.5733e-03, PNorm = 184.1310, GNorm = 0.1434, lr_0 = 1.4931e-04
Loss = 1.7864e-03, PNorm = 184.1332, GNorm = 0.1449, lr_0 = 1.4920e-04
Loss = 4.7035e-03, PNorm = 184.1344, GNorm = 0.1551, lr_0 = 1.4910e-04
Loss = 4.1668e-03, PNorm = 184.1365, GNorm = 0.2290, lr_0 = 1.4900e-04
Loss = 3.3408e-03, PNorm = 184.1386, GNorm = 0.1661, lr_0 = 1.4890e-04
Loss = 4.3689e-03, PNorm = 184.1401, GNorm = 0.0909, lr_0 = 1.4880e-04
Loss = 2.4984e-03, PNorm = 184.1410, GNorm = 0.0473, lr_0 = 1.4869e-04
Loss = 2.2149e-03, PNorm = 184.1428, GNorm = 0.0842, lr_0 = 1.4859e-04
Loss = 7.2802e-03, PNorm = 184.1455, GNorm = 0.2616, lr_0 = 1.4849e-04
Loss = 5.5238e-03, PNorm = 184.1469, GNorm = 0.1707, lr_0 = 1.4839e-04
Loss = 5.4629e-03, PNorm = 184.1494, GNorm = 0.1726, lr_0 = 1.4829e-04
Loss = 2.6744e-03, PNorm = 184.1520, GNorm = 0.1265, lr_0 = 1.4818e-04
Loss = 2.3568e-03, PNorm = 184.1530, GNorm = 0.3603, lr_0 = 1.4808e-04
Loss = 4.9824e-03, PNorm = 184.1545, GNorm = 0.0723, lr_0 = 1.4798e-04
Loss = 2.6576e-03, PNorm = 184.1574, GNorm = 0.0628, lr_0 = 1.4788e-04
Loss = 8.9573e-03, PNorm = 184.1596, GNorm = 0.4821, lr_0 = 1.4778e-04
Loss = 3.1517e-03, PNorm = 184.1625, GNorm = 0.1564, lr_0 = 1.4768e-04
Loss = 6.8003e-03, PNorm = 184.1672, GNorm = 0.1668, lr_0 = 1.4758e-04
Loss = 3.7086e-03, PNorm = 184.1701, GNorm = 0.1332, lr_0 = 1.4748e-04
Loss = 3.4685e-03, PNorm = 184.1714, GNorm = 0.0644, lr_0 = 1.4737e-04
Loss = 2.3245e-03, PNorm = 184.1731, GNorm = 0.0656, lr_0 = 1.4727e-04
Loss = 4.0182e-03, PNorm = 184.1747, GNorm = 0.1056, lr_0 = 1.4717e-04
Loss = 2.5634e-03, PNorm = 184.1764, GNorm = 0.1762, lr_0 = 1.4707e-04
Loss = 3.1498e-03, PNorm = 184.1795, GNorm = 0.2175, lr_0 = 1.4697e-04
Loss = 2.4210e-03, PNorm = 184.1823, GNorm = 0.0764, lr_0 = 1.4687e-04
Loss = 8.5604e-03, PNorm = 184.1850, GNorm = 0.0832, lr_0 = 1.4677e-04
Loss = 4.5284e-03, PNorm = 184.1866, GNorm = 0.0755, lr_0 = 1.4667e-04
Loss = 2.9556e-03, PNorm = 184.1882, GNorm = 0.1306, lr_0 = 1.4657e-04
Loss = 2.6815e-03, PNorm = 184.1904, GNorm = 0.1680, lr_0 = 1.4647e-04
Loss = 2.7265e-03, PNorm = 184.1934, GNorm = 0.5027, lr_0 = 1.4637e-04
Loss = 6.0743e-03, PNorm = 184.1938, GNorm = 0.0989, lr_0 = 1.4627e-04
Loss = 9.2861e-03, PNorm = 184.1949, GNorm = 0.1188, lr_0 = 1.4617e-04
Loss = 6.8502e-03, PNorm = 184.1969, GNorm = 0.0644, lr_0 = 1.4607e-04
Loss = 3.0788e-03, PNorm = 184.1997, GNorm = 0.0636, lr_0 = 1.4597e-04
Loss = 2.2435e-03, PNorm = 184.2020, GNorm = 0.0735, lr_0 = 1.4587e-04
Loss = 4.6036e-03, PNorm = 184.2044, GNorm = 0.2172, lr_0 = 1.4577e-04
Loss = 2.6135e-03, PNorm = 184.2062, GNorm = 0.0817, lr_0 = 1.4567e-04
Loss = 2.2818e-03, PNorm = 184.2091, GNorm = 0.0915, lr_0 = 1.4557e-04
Loss = 3.1576e-03, PNorm = 184.2121, GNorm = 0.1325, lr_0 = 1.4547e-04
Loss = 5.8507e-03, PNorm = 184.2159, GNorm = 0.1704, lr_0 = 1.4537e-04
Loss = 2.9033e-03, PNorm = 184.2183, GNorm = 0.0910, lr_0 = 1.4527e-04
Loss = 3.7502e-03, PNorm = 184.2212, GNorm = 0.0650, lr_0 = 1.4517e-04
Loss = 3.1480e-03, PNorm = 184.2244, GNorm = 0.0682, lr_0 = 1.4507e-04
Loss = 2.0768e-03, PNorm = 184.2265, GNorm = 0.1784, lr_0 = 1.4497e-04
Loss = 6.8954e-03, PNorm = 184.2282, GNorm = 0.1875, lr_0 = 1.4487e-04
Loss = 1.8789e-03, PNorm = 184.2305, GNorm = 0.1036, lr_0 = 1.4477e-04
Loss = 2.7162e-03, PNorm = 184.2331, GNorm = 0.1743, lr_0 = 1.4467e-04
Loss = 3.0921e-03, PNorm = 184.2357, GNorm = 0.0678, lr_0 = 1.4457e-04
Loss = 6.4022e-03, PNorm = 184.2378, GNorm = 0.0865, lr_0 = 1.4447e-04
Loss = 2.3222e-03, PNorm = 184.2413, GNorm = 0.0726, lr_0 = 1.4438e-04
Loss = 3.6926e-03, PNorm = 184.2421, GNorm = 0.1794, lr_0 = 1.4428e-04
Loss = 3.0131e-03, PNorm = 184.2455, GNorm = 0.1057, lr_0 = 1.4418e-04
Loss = 3.2125e-03, PNorm = 184.2493, GNorm = 0.0723, lr_0 = 1.4408e-04
Loss = 2.7186e-03, PNorm = 184.2523, GNorm = 0.0701, lr_0 = 1.4398e-04
Loss = 4.3334e-03, PNorm = 184.2547, GNorm = 0.0984, lr_0 = 1.4388e-04
Loss = 2.9814e-03, PNorm = 184.2567, GNorm = 0.0636, lr_0 = 1.4378e-04
Loss = 7.9180e-03, PNorm = 184.2592, GNorm = 0.0838, lr_0 = 1.4368e-04
Loss = 1.6095e-03, PNorm = 184.2614, GNorm = 0.0653, lr_0 = 1.4359e-04
Loss = 3.6878e-03, PNorm = 184.2638, GNorm = 0.1193, lr_0 = 1.4349e-04
Loss = 2.7492e-03, PNorm = 184.2666, GNorm = 0.1070, lr_0 = 1.4339e-04
Loss = 4.5303e-03, PNorm = 184.2690, GNorm = 0.0856, lr_0 = 1.4329e-04
Loss = 3.8207e-03, PNorm = 184.2704, GNorm = 0.1111, lr_0 = 1.4319e-04
Loss = 6.0713e-03, PNorm = 184.2729, GNorm = 0.1030, lr_0 = 1.4310e-04
Loss = 4.6616e-03, PNorm = 184.2737, GNorm = 0.5103, lr_0 = 1.4300e-04
Loss = 2.8420e-03, PNorm = 184.2768, GNorm = 0.0770, lr_0 = 1.4290e-04
Loss = 1.9434e-03, PNorm = 184.2798, GNorm = 0.0654, lr_0 = 1.4280e-04
Loss = 6.4767e-03, PNorm = 184.2815, GNorm = 0.1443, lr_0 = 1.4270e-04
Loss = 8.1425e-03, PNorm = 184.2852, GNorm = 0.1079, lr_0 = 1.4261e-04
Loss = 2.9402e-03, PNorm = 184.2896, GNorm = 0.1065, lr_0 = 1.4251e-04
Loss = 1.5649e-03, PNorm = 184.2934, GNorm = 0.1263, lr_0 = 1.4241e-04
Loss = 5.5036e-03, PNorm = 184.2983, GNorm = 0.2174, lr_0 = 1.4231e-04
Loss = 2.9538e-03, PNorm = 184.3028, GNorm = 0.1234, lr_0 = 1.4222e-04
Loss = 3.7390e-03, PNorm = 184.3048, GNorm = 0.1765, lr_0 = 1.4212e-04
Loss = 4.0796e-03, PNorm = 184.3071, GNorm = 0.0836, lr_0 = 1.4202e-04
Loss = 2.5525e-03, PNorm = 184.3083, GNorm = 0.1155, lr_0 = 1.4192e-04
Loss = 1.9899e-03, PNorm = 184.3101, GNorm = 0.1102, lr_0 = 1.4183e-04
Loss = 5.9514e-03, PNorm = 184.3128, GNorm = 0.1603, lr_0 = 1.4173e-04
Loss = 1.9546e-03, PNorm = 184.3143, GNorm = 0.1944, lr_0 = 1.4163e-04
Loss = 5.7370e-03, PNorm = 184.3160, GNorm = 0.0786, lr_0 = 1.4153e-04
Loss = 3.5121e-03, PNorm = 184.3174, GNorm = 0.0925, lr_0 = 1.4144e-04
Loss = 7.2392e-03, PNorm = 184.3182, GNorm = 0.1799, lr_0 = 1.4134e-04
Loss = 3.1800e-03, PNorm = 184.3191, GNorm = 0.0537, lr_0 = 1.4124e-04
Loss = 2.1391e-03, PNorm = 184.3185, GNorm = 0.0848, lr_0 = 1.4115e-04
Loss = 1.7920e-03, PNorm = 184.3200, GNorm = 0.0768, lr_0 = 1.4105e-04
Loss = 1.5999e-03, PNorm = 184.3215, GNorm = 0.0677, lr_0 = 1.4095e-04
Loss = 2.8388e-03, PNorm = 184.3232, GNorm = 0.0507, lr_0 = 1.4086e-04
Loss = 2.5859e-03, PNorm = 184.3255, GNorm = 0.2091, lr_0 = 1.4076e-04
Loss = 4.6587e-03, PNorm = 184.3278, GNorm = 0.0703, lr_0 = 1.4066e-04
Loss = 3.3970e-03, PNorm = 184.3302, GNorm = 0.0980, lr_0 = 1.4057e-04
Loss = 3.1065e-03, PNorm = 184.3313, GNorm = 0.0606, lr_0 = 1.4047e-04
Loss = 3.1417e-03, PNorm = 184.3327, GNorm = 0.0727, lr_0 = 1.4038e-04
Loss = 4.6949e-03, PNorm = 184.3354, GNorm = 0.1075, lr_0 = 1.4028e-04
Loss = 4.4536e-03, PNorm = 184.3376, GNorm = 0.3705, lr_0 = 1.4018e-04
Loss = 1.9327e-03, PNorm = 184.3389, GNorm = 0.2629, lr_0 = 1.4009e-04
Loss = 4.3728e-03, PNorm = 184.3416, GNorm = 0.0927, lr_0 = 1.3999e-04
Loss = 3.6285e-03, PNorm = 184.3432, GNorm = 0.0823, lr_0 = 1.3990e-04
Loss = 3.4858e-03, PNorm = 184.3442, GNorm = 0.0977, lr_0 = 1.3980e-04
Loss = 2.2680e-02, PNorm = 184.3481, GNorm = 0.2720, lr_0 = 1.3970e-04
Loss = 3.4915e-03, PNorm = 184.3493, GNorm = 0.1611, lr_0 = 1.3961e-04
Loss = 3.5976e-03, PNorm = 184.3509, GNorm = 0.1540, lr_0 = 1.3951e-04
Loss = 1.7627e-03, PNorm = 184.3531, GNorm = 0.0780, lr_0 = 1.3942e-04
Loss = 3.9555e-03, PNorm = 184.3543, GNorm = 0.1504, lr_0 = 1.3932e-04
Loss = 2.1708e-03, PNorm = 184.3557, GNorm = 0.0767, lr_0 = 1.3923e-04
Loss = 2.0839e-03, PNorm = 184.3583, GNorm = 0.1272, lr_0 = 1.3913e-04
Loss = 4.0620e-03, PNorm = 184.3597, GNorm = 0.0487, lr_0 = 1.3904e-04
Loss = 5.0985e-03, PNorm = 184.3619, GNorm = 0.0833, lr_0 = 1.3894e-04
Validation mae = 0.120769
Epoch 26
Loss = 2.2562e-03, PNorm = 184.3646, GNorm = 0.1019, lr_0 = 1.3884e-04
Loss = 2.3778e-03, PNorm = 184.3677, GNorm = 0.0772, lr_0 = 1.3875e-04
Loss = 6.4553e-03, PNorm = 184.3694, GNorm = 0.1733, lr_0 = 1.3865e-04
Loss = 1.7662e-03, PNorm = 184.3716, GNorm = 0.0493, lr_0 = 1.3856e-04
Loss = 6.4980e-03, PNorm = 184.3739, GNorm = 0.1201, lr_0 = 1.3846e-04
Loss = 3.8580e-03, PNorm = 184.3761, GNorm = 0.0962, lr_0 = 1.3837e-04
Loss = 3.8011e-03, PNorm = 184.3783, GNorm = 0.1537, lr_0 = 1.3828e-04
Loss = 3.6037e-03, PNorm = 184.3794, GNorm = 0.0759, lr_0 = 1.3818e-04
Loss = 5.1914e-03, PNorm = 184.3794, GNorm = 0.1863, lr_0 = 1.3809e-04
Loss = 3.3289e-03, PNorm = 184.3793, GNorm = 0.0864, lr_0 = 1.3799e-04
Loss = 4.8452e-03, PNorm = 184.3819, GNorm = 0.1101, lr_0 = 1.3790e-04
Loss = 6.3038e-03, PNorm = 184.3844, GNorm = 0.0698, lr_0 = 1.3780e-04
Loss = 4.2134e-03, PNorm = 184.3864, GNorm = 0.0775, lr_0 = 1.3771e-04
Loss = 1.2969e-03, PNorm = 184.3881, GNorm = 0.0884, lr_0 = 1.3761e-04
Loss = 1.9301e-03, PNorm = 184.3895, GNorm = 0.0929, lr_0 = 1.3752e-04
Loss = 1.8678e-03, PNorm = 184.3907, GNorm = 0.1117, lr_0 = 1.3742e-04
Loss = 2.1517e-03, PNorm = 184.3918, GNorm = 0.1466, lr_0 = 1.3733e-04
Loss = 4.4371e-03, PNorm = 184.3944, GNorm = 0.1100, lr_0 = 1.3724e-04
Loss = 3.2943e-03, PNorm = 184.3979, GNorm = 0.4584, lr_0 = 1.3714e-04
Loss = 2.2912e-03, PNorm = 184.3997, GNorm = 0.1615, lr_0 = 1.3705e-04
Loss = 3.0554e-03, PNorm = 184.4000, GNorm = 0.0979, lr_0 = 1.3695e-04
Loss = 1.9128e-03, PNorm = 184.4006, GNorm = 0.0983, lr_0 = 1.3686e-04
Loss = 1.8769e-03, PNorm = 184.4027, GNorm = 0.1141, lr_0 = 1.3677e-04
Loss = 1.5816e-03, PNorm = 184.4045, GNorm = 0.1006, lr_0 = 1.3667e-04
Loss = 3.7692e-03, PNorm = 184.4061, GNorm = 0.1233, lr_0 = 1.3658e-04
Loss = 3.2659e-03, PNorm = 184.4079, GNorm = 0.1109, lr_0 = 1.3649e-04
Loss = 2.7997e-03, PNorm = 184.4100, GNorm = 0.0719, lr_0 = 1.3639e-04
Loss = 5.3687e-03, PNorm = 184.4121, GNorm = 0.0677, lr_0 = 1.3630e-04
Loss = 2.2124e-03, PNorm = 184.4134, GNorm = 0.0698, lr_0 = 1.3621e-04
Loss = 1.9031e-03, PNorm = 184.4158, GNorm = 0.1575, lr_0 = 1.3611e-04
Loss = 1.0030e-02, PNorm = 184.4191, GNorm = 0.1183, lr_0 = 1.3602e-04
Loss = 9.4107e-03, PNorm = 184.4190, GNorm = 0.1554, lr_0 = 1.3593e-04
Loss = 2.9831e-03, PNorm = 184.4213, GNorm = 0.2276, lr_0 = 1.3583e-04
Loss = 1.5302e-03, PNorm = 184.4235, GNorm = 0.0992, lr_0 = 1.3574e-04
Loss = 1.6936e-03, PNorm = 184.4248, GNorm = 0.0777, lr_0 = 1.3565e-04
Loss = 2.2300e-03, PNorm = 184.4272, GNorm = 0.0611, lr_0 = 1.3555e-04
Loss = 3.4995e-03, PNorm = 184.4292, GNorm = 0.1818, lr_0 = 1.3546e-04
Loss = 2.0168e-03, PNorm = 184.4308, GNorm = 0.1025, lr_0 = 1.3537e-04
Loss = 2.8621e-03, PNorm = 184.4331, GNorm = 0.0550, lr_0 = 1.3528e-04
Loss = 1.3767e-03, PNorm = 184.4350, GNorm = 0.0822, lr_0 = 1.3518e-04
Loss = 2.4513e-03, PNorm = 184.4371, GNorm = 0.0842, lr_0 = 1.3509e-04
Loss = 4.4400e-03, PNorm = 184.4410, GNorm = 0.2707, lr_0 = 1.3500e-04
Loss = 2.0675e-03, PNorm = 184.4433, GNorm = 0.0759, lr_0 = 1.3491e-04
Loss = 2.1898e-03, PNorm = 184.4447, GNorm = 0.1962, lr_0 = 1.3481e-04
Loss = 1.4576e-03, PNorm = 184.4462, GNorm = 0.1469, lr_0 = 1.3472e-04
Loss = 3.0911e-03, PNorm = 184.4482, GNorm = 0.1031, lr_0 = 1.3463e-04
Loss = 2.4849e-03, PNorm = 184.4491, GNorm = 0.0994, lr_0 = 1.3454e-04
Loss = 4.7626e-03, PNorm = 184.4503, GNorm = 0.0919, lr_0 = 1.3444e-04
Loss = 7.7930e-03, PNorm = 184.4526, GNorm = 0.1212, lr_0 = 1.3435e-04
Loss = 1.6810e-03, PNorm = 184.4545, GNorm = 0.1915, lr_0 = 1.3426e-04
Loss = 2.4263e-03, PNorm = 184.4564, GNorm = 0.0753, lr_0 = 1.3417e-04
Loss = 2.8104e-03, PNorm = 184.4576, GNorm = 0.0678, lr_0 = 1.3408e-04
Loss = 6.9658e-03, PNorm = 184.4596, GNorm = 0.1653, lr_0 = 1.3398e-04
Loss = 1.6512e-03, PNorm = 184.4615, GNorm = 0.0660, lr_0 = 1.3389e-04
Loss = 4.3119e-03, PNorm = 184.4628, GNorm = 0.0639, lr_0 = 1.3380e-04
Loss = 2.8253e-03, PNorm = 184.4646, GNorm = 0.2388, lr_0 = 1.3371e-04
Loss = 3.1200e-03, PNorm = 184.4662, GNorm = 0.0902, lr_0 = 1.3362e-04
Loss = 4.0417e-03, PNorm = 184.4678, GNorm = 0.0982, lr_0 = 1.3353e-04
Loss = 4.0402e-03, PNorm = 184.4696, GNorm = 0.6534, lr_0 = 1.3343e-04
Loss = 6.0113e-03, PNorm = 184.4719, GNorm = 0.1547, lr_0 = 1.3334e-04
Loss = 2.6941e-03, PNorm = 184.4741, GNorm = 0.0836, lr_0 = 1.3325e-04
Loss = 3.7003e-03, PNorm = 184.4760, GNorm = 0.0953, lr_0 = 1.3316e-04
Loss = 1.9534e-03, PNorm = 184.4779, GNorm = 0.0614, lr_0 = 1.3307e-04
Loss = 3.4175e-03, PNorm = 184.4800, GNorm = 0.1355, lr_0 = 1.3298e-04
Loss = 3.8103e-03, PNorm = 184.4817, GNorm = 0.0913, lr_0 = 1.3289e-04
Loss = 3.4279e-03, PNorm = 184.4825, GNorm = 0.1610, lr_0 = 1.3280e-04
Loss = 1.7818e-03, PNorm = 184.4846, GNorm = 0.2035, lr_0 = 1.3270e-04
Loss = 1.9324e-03, PNorm = 184.4864, GNorm = 0.0745, lr_0 = 1.3261e-04
Loss = 4.4667e-03, PNorm = 184.4881, GNorm = 0.0554, lr_0 = 1.3252e-04
Loss = 4.7948e-03, PNorm = 184.4889, GNorm = 0.1565, lr_0 = 1.3243e-04
Loss = 3.5927e-03, PNorm = 184.4906, GNorm = 0.1777, lr_0 = 1.3234e-04
Loss = 6.6924e-03, PNorm = 184.4929, GNorm = 0.0673, lr_0 = 1.3225e-04
Loss = 3.4781e-03, PNorm = 184.4949, GNorm = 0.0744, lr_0 = 1.3216e-04
Loss = 4.3142e-03, PNorm = 184.4967, GNorm = 0.2459, lr_0 = 1.3207e-04
Loss = 1.1377e-02, PNorm = 184.5000, GNorm = 0.0665, lr_0 = 1.3198e-04
Loss = 4.1624e-03, PNorm = 184.5033, GNorm = 0.1145, lr_0 = 1.3189e-04
Loss = 4.1988e-03, PNorm = 184.5062, GNorm = 0.0832, lr_0 = 1.3180e-04
Loss = 1.5357e-03, PNorm = 184.5084, GNorm = 0.0996, lr_0 = 1.3171e-04
Loss = 2.3632e-03, PNorm = 184.5102, GNorm = 0.2316, lr_0 = 1.3162e-04
Loss = 1.7805e-03, PNorm = 184.5113, GNorm = 0.0517, lr_0 = 1.3153e-04
Loss = 2.4228e-03, PNorm = 184.5126, GNorm = 0.1071, lr_0 = 1.3144e-04
Loss = 7.1677e-03, PNorm = 184.5137, GNorm = 0.1983, lr_0 = 1.3135e-04
Loss = 2.5042e-03, PNorm = 184.5162, GNorm = 0.1245, lr_0 = 1.3126e-04
Loss = 6.4598e-03, PNorm = 184.5170, GNorm = 0.1312, lr_0 = 1.3117e-04
Loss = 2.2999e-03, PNorm = 184.5181, GNorm = 0.0417, lr_0 = 1.3108e-04
Loss = 3.1443e-03, PNorm = 184.5201, GNorm = 0.1863, lr_0 = 1.3099e-04
Loss = 6.0073e-03, PNorm = 184.5218, GNorm = 0.1041, lr_0 = 1.3090e-04
Loss = 1.8254e-03, PNorm = 184.5242, GNorm = 0.0588, lr_0 = 1.3081e-04
Loss = 5.6665e-03, PNorm = 184.5244, GNorm = 0.0960, lr_0 = 1.3072e-04
Loss = 1.6683e-03, PNorm = 184.5266, GNorm = 0.1097, lr_0 = 1.3063e-04
Loss = 5.3849e-03, PNorm = 184.5274, GNorm = 0.1703, lr_0 = 1.3054e-04
Loss = 4.2446e-03, PNorm = 184.5308, GNorm = 0.2610, lr_0 = 1.3045e-04
Loss = 1.6081e-03, PNorm = 184.5326, GNorm = 0.2545, lr_0 = 1.3036e-04
Loss = 5.4266e-03, PNorm = 184.5353, GNorm = 0.0521, lr_0 = 1.3027e-04
Loss = 3.1282e-03, PNorm = 184.5370, GNorm = 0.0571, lr_0 = 1.3018e-04
Loss = 1.6331e-03, PNorm = 184.5377, GNorm = 0.1206, lr_0 = 1.3009e-04
Loss = 2.5391e-03, PNorm = 184.5387, GNorm = 0.0848, lr_0 = 1.3000e-04
Loss = 3.7215e-03, PNorm = 184.5400, GNorm = 0.1793, lr_0 = 1.2992e-04
Loss = 5.2993e-03, PNorm = 184.5413, GNorm = 0.1664, lr_0 = 1.2983e-04
Loss = 1.5004e-03, PNorm = 184.5435, GNorm = 0.0659, lr_0 = 1.2974e-04
Loss = 2.5067e-03, PNorm = 184.5450, GNorm = 0.0597, lr_0 = 1.2965e-04
Loss = 3.3652e-03, PNorm = 184.5462, GNorm = 0.1067, lr_0 = 1.2956e-04
Loss = 5.1643e-03, PNorm = 184.5475, GNorm = 0.1475, lr_0 = 1.2947e-04
Loss = 2.3152e-03, PNorm = 184.5489, GNorm = 0.1062, lr_0 = 1.2938e-04
Loss = 3.9921e-03, PNorm = 184.5516, GNorm = 0.1478, lr_0 = 1.2929e-04
Loss = 2.3833e-03, PNorm = 184.5530, GNorm = 0.1474, lr_0 = 1.2921e-04
Loss = 4.6265e-03, PNorm = 184.5535, GNorm = 0.0563, lr_0 = 1.2912e-04
Loss = 2.2496e-03, PNorm = 184.5541, GNorm = 0.1047, lr_0 = 1.2903e-04
Loss = 1.3211e-03, PNorm = 184.5553, GNorm = 0.0570, lr_0 = 1.2894e-04
Loss = 5.4027e-03, PNorm = 184.5558, GNorm = 0.2776, lr_0 = 1.2885e-04
Loss = 2.0853e-03, PNorm = 184.5585, GNorm = 0.2807, lr_0 = 1.2876e-04
Loss = 7.5647e-03, PNorm = 184.5614, GNorm = 0.0978, lr_0 = 1.2867e-04
Loss = 3.9780e-03, PNorm = 184.5638, GNorm = 0.8872, lr_0 = 1.2859e-04
Loss = 6.8525e-03, PNorm = 184.5648, GNorm = 0.0873, lr_0 = 1.2850e-04
Loss = 1.9124e-03, PNorm = 184.5665, GNorm = 0.0799, lr_0 = 1.2841e-04
Loss = 2.5023e-03, PNorm = 184.5697, GNorm = 0.0775, lr_0 = 1.2832e-04
Loss = 2.2096e-03, PNorm = 184.5720, GNorm = 0.0750, lr_0 = 1.2823e-04
Loss = 4.1549e-03, PNorm = 184.5730, GNorm = 0.0759, lr_0 = 1.2815e-04
Loss = 3.7628e-03, PNorm = 184.5759, GNorm = 0.1086, lr_0 = 1.2806e-04
Loss = 1.9450e-03, PNorm = 184.5778, GNorm = 0.0748, lr_0 = 1.2797e-04
Validation mae = 0.120647
Epoch 27
Loss = 6.4295e-03, PNorm = 184.5807, GNorm = 0.1825, lr_0 = 1.2788e-04
Loss = 3.3455e-03, PNorm = 184.5832, GNorm = 0.0740, lr_0 = 1.2780e-04
Loss = 2.7985e-03, PNorm = 184.5849, GNorm = 0.0846, lr_0 = 1.2771e-04
Loss = 4.4783e-03, PNorm = 184.5869, GNorm = 0.0680, lr_0 = 1.2762e-04
Loss = 1.8569e-03, PNorm = 184.5881, GNorm = 0.0636, lr_0 = 1.2753e-04
Loss = 3.0975e-03, PNorm = 184.5889, GNorm = 0.0678, lr_0 = 1.2745e-04
Loss = 6.2675e-03, PNorm = 184.5900, GNorm = 0.0609, lr_0 = 1.2736e-04
Loss = 2.2944e-03, PNorm = 184.5903, GNorm = 0.0342, lr_0 = 1.2727e-04
Loss = 1.7680e-03, PNorm = 184.5910, GNorm = 0.0648, lr_0 = 1.2718e-04
Loss = 3.3663e-03, PNorm = 184.5915, GNorm = 0.0803, lr_0 = 1.2710e-04
Loss = 1.9775e-03, PNorm = 184.5926, GNorm = 0.1556, lr_0 = 1.2701e-04
Loss = 1.2063e-03, PNorm = 184.5955, GNorm = 0.1123, lr_0 = 1.2692e-04
Loss = 2.4113e-03, PNorm = 184.5970, GNorm = 0.0863, lr_0 = 1.2684e-04
Loss = 2.5299e-03, PNorm = 184.5984, GNorm = 0.1211, lr_0 = 1.2675e-04
Loss = 3.8581e-03, PNorm = 184.5988, GNorm = 0.0762, lr_0 = 1.2666e-04
Loss = 4.9664e-03, PNorm = 184.6003, GNorm = 0.0883, lr_0 = 1.2658e-04
Loss = 3.8662e-03, PNorm = 184.6017, GNorm = 0.4295, lr_0 = 1.2649e-04
Loss = 5.1722e-03, PNorm = 184.6046, GNorm = 0.1220, lr_0 = 1.2640e-04
Loss = 2.6497e-03, PNorm = 184.6064, GNorm = 0.0518, lr_0 = 1.2632e-04
Loss = 2.3523e-03, PNorm = 184.6069, GNorm = 0.1147, lr_0 = 1.2623e-04
Loss = 1.5874e-03, PNorm = 184.6084, GNorm = 0.1274, lr_0 = 1.2614e-04
Loss = 2.4754e-03, PNorm = 184.6096, GNorm = 0.0423, lr_0 = 1.2606e-04
Loss = 3.1588e-03, PNorm = 184.6118, GNorm = 0.0670, lr_0 = 1.2597e-04
Loss = 1.1661e-03, PNorm = 184.6129, GNorm = 0.0635, lr_0 = 1.2588e-04
Loss = 7.8494e-03, PNorm = 184.6143, GNorm = 0.0670, lr_0 = 1.2580e-04
Loss = 1.1951e-03, PNorm = 184.6146, GNorm = 0.0999, lr_0 = 1.2571e-04
Loss = 7.1288e-03, PNorm = 184.6162, GNorm = 0.1530, lr_0 = 1.2563e-04
Loss = 1.2249e-03, PNorm = 184.6179, GNorm = 0.0706, lr_0 = 1.2554e-04
Loss = 2.2417e-03, PNorm = 184.6201, GNorm = 0.0830, lr_0 = 1.2545e-04
Loss = 2.8462e-03, PNorm = 184.6219, GNorm = 0.1471, lr_0 = 1.2537e-04
Loss = 2.4464e-03, PNorm = 184.6218, GNorm = 0.0654, lr_0 = 1.2528e-04
Loss = 2.4929e-03, PNorm = 184.6231, GNorm = 0.1304, lr_0 = 1.2520e-04
Loss = 3.0511e-03, PNorm = 184.6252, GNorm = 0.0719, lr_0 = 1.2511e-04
Loss = 1.5602e-02, PNorm = 184.6284, GNorm = 0.3484, lr_0 = 1.2502e-04
Loss = 2.5767e-03, PNorm = 184.6288, GNorm = 0.0901, lr_0 = 1.2494e-04
Loss = 1.4334e-03, PNorm = 184.6291, GNorm = 0.0876, lr_0 = 1.2485e-04
Loss = 1.3089e-03, PNorm = 184.6307, GNorm = 0.0402, lr_0 = 1.2477e-04
Loss = 5.9427e-03, PNorm = 184.6320, GNorm = 0.1069, lr_0 = 1.2468e-04
Loss = 1.6104e-03, PNorm = 184.6327, GNorm = 0.0773, lr_0 = 1.2460e-04
Loss = 2.0472e-03, PNorm = 184.6343, GNorm = 0.1900, lr_0 = 1.2451e-04
Loss = 2.2434e-03, PNorm = 184.6354, GNorm = 0.1364, lr_0 = 1.2443e-04
Loss = 1.4591e-03, PNorm = 184.6366, GNorm = 0.1167, lr_0 = 1.2434e-04
Loss = 3.1488e-03, PNorm = 184.6380, GNorm = 0.1175, lr_0 = 1.2426e-04
Loss = 2.5806e-03, PNorm = 184.6392, GNorm = 0.3016, lr_0 = 1.2417e-04
Loss = 4.0984e-03, PNorm = 184.6411, GNorm = 0.1087, lr_0 = 1.2409e-04
Loss = 1.6642e-03, PNorm = 184.6441, GNorm = 0.1086, lr_0 = 1.2400e-04
Loss = 2.6562e-03, PNorm = 184.6461, GNorm = 0.0651, lr_0 = 1.2392e-04
Loss = 1.8383e-03, PNorm = 184.6474, GNorm = 0.0702, lr_0 = 1.2383e-04
Loss = 1.3937e-03, PNorm = 184.6476, GNorm = 0.0637, lr_0 = 1.2375e-04
Loss = 5.4869e-03, PNorm = 184.6486, GNorm = 0.2767, lr_0 = 1.2366e-04
Loss = 5.5669e-03, PNorm = 184.6497, GNorm = 0.5243, lr_0 = 1.2358e-04
Loss = 1.2720e-03, PNorm = 184.6506, GNorm = 0.0548, lr_0 = 1.2349e-04
Loss = 4.0494e-03, PNorm = 184.6529, GNorm = 0.1349, lr_0 = 1.2341e-04
Loss = 1.5942e-03, PNorm = 184.6547, GNorm = 0.0471, lr_0 = 1.2332e-04
Loss = 7.9478e-03, PNorm = 184.6568, GNorm = 0.1022, lr_0 = 1.2324e-04
Loss = 9.4319e-03, PNorm = 184.6581, GNorm = 2.2866, lr_0 = 1.2315e-04
Loss = 6.8882e-03, PNorm = 184.6587, GNorm = 0.4794, lr_0 = 1.2307e-04
Loss = 1.6484e-03, PNorm = 184.6622, GNorm = 0.1749, lr_0 = 1.2298e-04
Loss = 5.7295e-03, PNorm = 184.6636, GNorm = 0.1331, lr_0 = 1.2290e-04
Loss = 6.0264e-03, PNorm = 184.6659, GNorm = 0.1224, lr_0 = 1.2282e-04
Loss = 1.8745e-03, PNorm = 184.6684, GNorm = 0.0599, lr_0 = 1.2273e-04
Loss = 2.2500e-03, PNorm = 184.6706, GNorm = 0.0573, lr_0 = 1.2265e-04
Loss = 2.8819e-03, PNorm = 184.6732, GNorm = 0.1031, lr_0 = 1.2256e-04
Loss = 1.3444e-03, PNorm = 184.6760, GNorm = 0.1813, lr_0 = 1.2248e-04
Loss = 2.4675e-03, PNorm = 184.6797, GNorm = 0.1518, lr_0 = 1.2240e-04
Loss = 2.1386e-03, PNorm = 184.6822, GNorm = 0.1727, lr_0 = 1.2231e-04
Loss = 1.2234e-03, PNorm = 184.6837, GNorm = 0.1429, lr_0 = 1.2223e-04
Loss = 2.6079e-03, PNorm = 184.6846, GNorm = 0.0981, lr_0 = 1.2214e-04
Loss = 5.7990e-03, PNorm = 184.6863, GNorm = 0.1193, lr_0 = 1.2206e-04
Loss = 3.3490e-03, PNorm = 184.6891, GNorm = 0.0594, lr_0 = 1.2198e-04
Loss = 2.1420e-03, PNorm = 184.6918, GNorm = 0.0591, lr_0 = 1.2189e-04
Loss = 2.5436e-03, PNorm = 184.6944, GNorm = 0.1145, lr_0 = 1.2181e-04
Loss = 1.1741e-03, PNorm = 184.6951, GNorm = 0.1754, lr_0 = 1.2173e-04
Loss = 2.2878e-03, PNorm = 184.6962, GNorm = 0.1902, lr_0 = 1.2164e-04
Loss = 3.9643e-03, PNorm = 184.6972, GNorm = 0.0963, lr_0 = 1.2156e-04
Loss = 2.7668e-03, PNorm = 184.6993, GNorm = 0.1128, lr_0 = 1.2148e-04
Loss = 1.5087e-03, PNorm = 184.7008, GNorm = 0.0426, lr_0 = 1.2139e-04
Loss = 1.3766e-03, PNorm = 184.7032, GNorm = 0.0950, lr_0 = 1.2131e-04
Loss = 1.2731e-03, PNorm = 184.7065, GNorm = 0.0649, lr_0 = 1.2123e-04
Loss = 1.7930e-03, PNorm = 184.7089, GNorm = 0.0554, lr_0 = 1.2114e-04
Loss = 4.4788e-03, PNorm = 184.7111, GNorm = 0.0657, lr_0 = 1.2106e-04
Loss = 2.7291e-03, PNorm = 184.7140, GNorm = 0.1191, lr_0 = 1.2098e-04
Loss = 6.3133e-03, PNorm = 184.7179, GNorm = 0.1589, lr_0 = 1.2090e-04
Loss = 2.8892e-03, PNorm = 184.7197, GNorm = 0.1205, lr_0 = 1.2081e-04
Loss = 6.9318e-03, PNorm = 184.7197, GNorm = 0.0841, lr_0 = 1.2073e-04
Loss = 2.4799e-03, PNorm = 184.7205, GNorm = 0.1457, lr_0 = 1.2065e-04
Loss = 3.0904e-03, PNorm = 184.7233, GNorm = 0.1872, lr_0 = 1.2056e-04
Loss = 2.1949e-03, PNorm = 184.7260, GNorm = 0.1326, lr_0 = 1.2048e-04
Loss = 2.7315e-03, PNorm = 184.7278, GNorm = 0.0753, lr_0 = 1.2040e-04
Loss = 4.6645e-03, PNorm = 184.7292, GNorm = 0.1673, lr_0 = 1.2032e-04
Loss = 7.0089e-03, PNorm = 184.7310, GNorm = 0.0745, lr_0 = 1.2023e-04
Loss = 2.4576e-03, PNorm = 184.7332, GNorm = 0.0917, lr_0 = 1.2015e-04
Loss = 1.5894e-03, PNorm = 184.7353, GNorm = 0.0711, lr_0 = 1.2007e-04
Loss = 8.2932e-03, PNorm = 184.7381, GNorm = 0.1116, lr_0 = 1.1999e-04
Loss = 4.1735e-03, PNorm = 184.7389, GNorm = 0.1221, lr_0 = 1.1991e-04
Loss = 2.9032e-03, PNorm = 184.7418, GNorm = 0.1228, lr_0 = 1.1982e-04
Loss = 3.6805e-03, PNorm = 184.7434, GNorm = 0.0630, lr_0 = 1.1974e-04
Loss = 2.6549e-03, PNorm = 184.7441, GNorm = 0.0961, lr_0 = 1.1966e-04
Loss = 4.1803e-03, PNorm = 184.7463, GNorm = 0.0886, lr_0 = 1.1958e-04
Loss = 2.5992e-03, PNorm = 184.7479, GNorm = 0.0947, lr_0 = 1.1950e-04
Loss = 1.3479e-03, PNorm = 184.7491, GNorm = 0.0550, lr_0 = 1.1941e-04
Loss = 1.0522e-03, PNorm = 184.7516, GNorm = 0.0611, lr_0 = 1.1933e-04
Loss = 3.8742e-03, PNorm = 184.7535, GNorm = 0.3419, lr_0 = 1.1925e-04
Loss = 1.6312e-03, PNorm = 184.7552, GNorm = 0.0796, lr_0 = 1.1917e-04
Loss = 1.8082e-03, PNorm = 184.7573, GNorm = 0.0868, lr_0 = 1.1909e-04
Loss = 5.0577e-03, PNorm = 184.7596, GNorm = 0.1152, lr_0 = 1.1901e-04
Loss = 1.2668e-03, PNorm = 184.7615, GNorm = 0.0962, lr_0 = 1.1892e-04
Loss = 2.8381e-03, PNorm = 184.7631, GNorm = 0.0627, lr_0 = 1.1884e-04
Loss = 4.5658e-03, PNorm = 184.7644, GNorm = 0.0722, lr_0 = 1.1876e-04
Loss = 4.1135e-03, PNorm = 184.7661, GNorm = 0.1082, lr_0 = 1.1868e-04
Loss = 2.8783e-03, PNorm = 184.7673, GNorm = 0.0640, lr_0 = 1.1860e-04
Loss = 3.0997e-03, PNorm = 184.7689, GNorm = 0.1096, lr_0 = 1.1852e-04
Loss = 1.4274e-03, PNorm = 184.7714, GNorm = 0.1014, lr_0 = 1.1844e-04
Loss = 1.5782e-03, PNorm = 184.7736, GNorm = 0.0955, lr_0 = 1.1835e-04
Loss = 4.0162e-03, PNorm = 184.7759, GNorm = 0.2880, lr_0 = 1.1827e-04
Loss = 3.3220e-03, PNorm = 184.7781, GNorm = 0.0324, lr_0 = 1.1819e-04
Loss = 3.4928e-03, PNorm = 184.7804, GNorm = 0.0679, lr_0 = 1.1811e-04
Loss = 2.4947e-03, PNorm = 184.7814, GNorm = 0.1340, lr_0 = 1.1803e-04
Loss = 3.0165e-03, PNorm = 184.7842, GNorm = 0.0531, lr_0 = 1.1795e-04
Loss = 1.9133e-03, PNorm = 184.7856, GNorm = 0.1855, lr_0 = 1.1787e-04
Validation mae = 0.120670
Epoch 28
Loss = 2.1210e-03, PNorm = 184.7866, GNorm = 0.0716, lr_0 = 1.1779e-04
Loss = 1.2450e-03, PNorm = 184.7871, GNorm = 0.0458, lr_0 = 1.1771e-04
Loss = 1.3430e-03, PNorm = 184.7877, GNorm = 0.0941, lr_0 = 1.1763e-04
Loss = 2.1869e-03, PNorm = 184.7881, GNorm = 0.0893, lr_0 = 1.1755e-04
Loss = 3.0614e-03, PNorm = 184.7881, GNorm = 0.1273, lr_0 = 1.1747e-04
Loss = 2.5823e-03, PNorm = 184.7875, GNorm = 0.0832, lr_0 = 1.1739e-04
Loss = 1.3927e-03, PNorm = 184.7879, GNorm = 0.1071, lr_0 = 1.1730e-04
Loss = 1.1079e-03, PNorm = 184.7893, GNorm = 0.1074, lr_0 = 1.1722e-04
Loss = 1.3399e-03, PNorm = 184.7911, GNorm = 0.1361, lr_0 = 1.1714e-04
Loss = 2.3619e-03, PNorm = 184.7924, GNorm = 0.0811, lr_0 = 1.1706e-04
Loss = 1.0244e-03, PNorm = 184.7940, GNorm = 0.0519, lr_0 = 1.1698e-04
Loss = 1.9966e-03, PNorm = 184.7948, GNorm = 0.2384, lr_0 = 1.1690e-04
Loss = 2.8458e-03, PNorm = 184.7945, GNorm = 0.0638, lr_0 = 1.1682e-04
Loss = 1.1572e-02, PNorm = 184.7972, GNorm = 0.2267, lr_0 = 1.1674e-04
Loss = 1.8656e-03, PNorm = 184.7990, GNorm = 0.1695, lr_0 = 1.1666e-04
Loss = 1.8565e-03, PNorm = 184.7993, GNorm = 0.0524, lr_0 = 1.1658e-04
Loss = 7.1007e-03, PNorm = 184.8012, GNorm = 0.1020, lr_0 = 1.1650e-04
Loss = 1.7124e-03, PNorm = 184.8033, GNorm = 0.1936, lr_0 = 1.1642e-04
Loss = 3.7674e-03, PNorm = 184.8040, GNorm = 0.0433, lr_0 = 1.1634e-04
Loss = 2.8149e-03, PNorm = 184.8034, GNorm = 0.2407, lr_0 = 1.1626e-04
Loss = 1.2471e-03, PNorm = 184.8036, GNorm = 0.0707, lr_0 = 1.1618e-04
Loss = 4.5840e-03, PNorm = 184.8040, GNorm = 0.0694, lr_0 = 1.1611e-04
Loss = 1.1579e-03, PNorm = 184.8045, GNorm = 0.0845, lr_0 = 1.1603e-04
Loss = 3.4200e-03, PNorm = 184.8064, GNorm = 0.0523, lr_0 = 1.1595e-04
Loss = 1.7225e-03, PNorm = 184.8080, GNorm = 0.1167, lr_0 = 1.1587e-04
Loss = 3.2798e-03, PNorm = 184.8078, GNorm = 0.1861, lr_0 = 1.1579e-04
Loss = 1.2554e-03, PNorm = 184.8088, GNorm = 0.1060, lr_0 = 1.1571e-04
Loss = 1.7597e-03, PNorm = 184.8100, GNorm = 0.0369, lr_0 = 1.1563e-04
Loss = 1.1031e-03, PNorm = 184.8116, GNorm = 0.0653, lr_0 = 1.1555e-04
Loss = 6.1853e-03, PNorm = 184.8124, GNorm = 0.0658, lr_0 = 1.1547e-04
Loss = 1.9385e-03, PNorm = 184.8130, GNorm = 0.0946, lr_0 = 1.1539e-04
Loss = 1.6895e-03, PNorm = 184.8141, GNorm = 0.0659, lr_0 = 1.1531e-04
Loss = 1.8942e-03, PNorm = 184.8154, GNorm = 0.0683, lr_0 = 1.1523e-04
Loss = 3.0510e-03, PNorm = 184.8155, GNorm = 0.0645, lr_0 = 1.1515e-04
Loss = 1.1731e-03, PNorm = 184.8166, GNorm = 0.0705, lr_0 = 1.1508e-04
Loss = 3.4699e-03, PNorm = 184.8179, GNorm = 0.0711, lr_0 = 1.1500e-04
Loss = 4.0712e-03, PNorm = 184.8187, GNorm = 0.0266, lr_0 = 1.1492e-04
Loss = 1.8016e-03, PNorm = 184.8193, GNorm = 0.1286, lr_0 = 1.1484e-04
Loss = 1.1974e-03, PNorm = 184.8189, GNorm = 0.1416, lr_0 = 1.1476e-04
Loss = 2.3834e-03, PNorm = 184.8196, GNorm = 0.1153, lr_0 = 1.1468e-04
Loss = 4.9589e-03, PNorm = 184.8199, GNorm = 0.0491, lr_0 = 1.1460e-04
Loss = 3.2724e-03, PNorm = 184.8210, GNorm = 0.0474, lr_0 = 1.1452e-04
Loss = 9.6828e-04, PNorm = 184.8229, GNorm = 0.1485, lr_0 = 1.1445e-04
Loss = 7.9549e-03, PNorm = 184.8243, GNorm = 0.0709, lr_0 = 1.1437e-04
Loss = 3.9816e-03, PNorm = 184.8265, GNorm = 0.1153, lr_0 = 1.1429e-04
Loss = 1.5816e-03, PNorm = 184.8285, GNorm = 0.1031, lr_0 = 1.1421e-04
Loss = 3.5478e-03, PNorm = 184.8302, GNorm = 0.0891, lr_0 = 1.1413e-04
Loss = 2.0046e-03, PNorm = 184.8315, GNorm = 0.0573, lr_0 = 1.1405e-04
Loss = 1.6694e-03, PNorm = 184.8324, GNorm = 0.0685, lr_0 = 1.1398e-04
Loss = 2.3421e-03, PNorm = 184.8339, GNorm = 0.1345, lr_0 = 1.1390e-04
Loss = 3.5831e-03, PNorm = 184.8355, GNorm = 0.1655, lr_0 = 1.1382e-04
Loss = 5.2160e-03, PNorm = 184.8369, GNorm = 0.0510, lr_0 = 1.1374e-04
Loss = 4.5787e-03, PNorm = 184.8370, GNorm = 0.1080, lr_0 = 1.1366e-04
Loss = 1.0871e-03, PNorm = 184.8370, GNorm = 0.1004, lr_0 = 1.1359e-04
Loss = 3.6526e-03, PNorm = 184.8392, GNorm = 0.0424, lr_0 = 1.1351e-04
Loss = 4.3393e-03, PNorm = 184.8405, GNorm = 0.1045, lr_0 = 1.1343e-04
Loss = 1.2392e-03, PNorm = 184.8428, GNorm = 0.1342, lr_0 = 1.1335e-04
Loss = 2.3115e-03, PNorm = 184.8461, GNorm = 0.0440, lr_0 = 1.1328e-04
Loss = 2.7576e-03, PNorm = 184.8490, GNorm = 0.1250, lr_0 = 1.1320e-04
Loss = 1.3518e-03, PNorm = 184.8495, GNorm = 0.0589, lr_0 = 1.1312e-04
Loss = 1.6875e-03, PNorm = 184.8497, GNorm = 0.0706, lr_0 = 1.1304e-04
Loss = 1.9541e-03, PNorm = 184.8506, GNorm = 0.1874, lr_0 = 1.1297e-04
Loss = 3.2685e-03, PNorm = 184.8535, GNorm = 0.2217, lr_0 = 1.1289e-04
Loss = 2.4033e-03, PNorm = 184.8550, GNorm = 0.2685, lr_0 = 1.1281e-04
Loss = 6.1226e-03, PNorm = 184.8557, GNorm = 0.0706, lr_0 = 1.1273e-04
Loss = 3.5993e-03, PNorm = 184.8554, GNorm = 0.0431, lr_0 = 1.1266e-04
Loss = 3.9589e-03, PNorm = 184.8569, GNorm = 0.0941, lr_0 = 1.1258e-04
Loss = 6.8975e-03, PNorm = 184.8579, GNorm = 0.1339, lr_0 = 1.1250e-04
Loss = 1.4845e-03, PNorm = 184.8600, GNorm = 0.1518, lr_0 = 1.1243e-04
Loss = 3.8959e-03, PNorm = 184.8615, GNorm = 0.1496, lr_0 = 1.1235e-04
Loss = 2.6883e-03, PNorm = 184.8620, GNorm = 0.0629, lr_0 = 1.1227e-04
Loss = 2.4538e-03, PNorm = 184.8637, GNorm = 0.3889, lr_0 = 1.1219e-04
Loss = 1.5694e-03, PNorm = 184.8659, GNorm = 0.1076, lr_0 = 1.1212e-04
Loss = 8.0840e-03, PNorm = 184.8683, GNorm = 0.1409, lr_0 = 1.1204e-04
Loss = 3.8170e-03, PNorm = 184.8689, GNorm = 0.0581, lr_0 = 1.1196e-04
Loss = 1.0379e-03, PNorm = 184.8699, GNorm = 0.0719, lr_0 = 1.1189e-04
Loss = 2.5676e-03, PNorm = 184.8710, GNorm = 0.1260, lr_0 = 1.1181e-04
Loss = 3.5131e-03, PNorm = 184.8725, GNorm = 0.2897, lr_0 = 1.1173e-04
Loss = 6.1777e-03, PNorm = 184.8734, GNorm = 0.7183, lr_0 = 1.1166e-04
Loss = 4.9760e-03, PNorm = 184.8752, GNorm = 0.1636, lr_0 = 1.1158e-04
Loss = 1.1383e-03, PNorm = 184.8752, GNorm = 0.0856, lr_0 = 1.1150e-04
Loss = 2.8598e-03, PNorm = 184.8756, GNorm = 0.0683, lr_0 = 1.1143e-04
Loss = 2.9720e-03, PNorm = 184.8759, GNorm = 0.1386, lr_0 = 1.1135e-04
Loss = 4.1752e-03, PNorm = 184.8773, GNorm = 0.7846, lr_0 = 1.1128e-04
Loss = 2.6396e-03, PNorm = 184.8789, GNorm = 0.0735, lr_0 = 1.1120e-04
Loss = 3.5685e-03, PNorm = 184.8807, GNorm = 0.0827, lr_0 = 1.1112e-04
Loss = 6.7761e-03, PNorm = 184.8827, GNorm = 0.0501, lr_0 = 1.1105e-04
Loss = 1.8232e-03, PNorm = 184.8848, GNorm = 0.0597, lr_0 = 1.1097e-04
Loss = 3.0405e-03, PNorm = 184.8866, GNorm = 0.0653, lr_0 = 1.1089e-04
Loss = 1.5586e-03, PNorm = 184.8880, GNorm = 0.0639, lr_0 = 1.1082e-04
Loss = 3.7296e-03, PNorm = 184.8888, GNorm = 0.1124, lr_0 = 1.1074e-04
Loss = 5.3907e-03, PNorm = 184.8910, GNorm = 0.6344, lr_0 = 1.1067e-04
Loss = 2.2917e-03, PNorm = 184.8954, GNorm = 0.0784, lr_0 = 1.1059e-04
Loss = 9.1003e-03, PNorm = 184.8970, GNorm = 0.3546, lr_0 = 1.1052e-04
Loss = 2.9166e-03, PNorm = 184.8979, GNorm = 0.4008, lr_0 = 1.1044e-04
Loss = 2.7334e-03, PNorm = 184.9004, GNorm = 0.0870, lr_0 = 1.1036e-04
Loss = 1.3748e-03, PNorm = 184.9026, GNorm = 0.0854, lr_0 = 1.1029e-04
Loss = 1.6038e-03, PNorm = 184.9032, GNorm = 0.0749, lr_0 = 1.1021e-04
Loss = 3.4503e-03, PNorm = 184.9041, GNorm = 0.1674, lr_0 = 1.1014e-04
Loss = 4.8183e-03, PNorm = 184.9059, GNorm = 0.2111, lr_0 = 1.1006e-04
Loss = 3.0524e-03, PNorm = 184.9077, GNorm = 0.1260, lr_0 = 1.0999e-04
Loss = 4.1360e-03, PNorm = 184.9111, GNorm = 0.0825, lr_0 = 1.0991e-04
Loss = 1.4646e-03, PNorm = 184.9126, GNorm = 0.1656, lr_0 = 1.0984e-04
Loss = 2.0257e-03, PNorm = 184.9138, GNorm = 0.4071, lr_0 = 1.0976e-04
Loss = 3.1715e-03, PNorm = 184.9158, GNorm = 0.5699, lr_0 = 1.0969e-04
Loss = 5.3581e-03, PNorm = 184.9165, GNorm = 0.0445, lr_0 = 1.0961e-04
Loss = 3.1816e-03, PNorm = 184.9177, GNorm = 0.0872, lr_0 = 1.0954e-04
Loss = 8.3015e-03, PNorm = 184.9188, GNorm = 0.0814, lr_0 = 1.0946e-04
Loss = 1.0315e-03, PNorm = 184.9194, GNorm = 0.1027, lr_0 = 1.0939e-04
Loss = 1.3653e-03, PNorm = 184.9190, GNorm = 0.1028, lr_0 = 1.0931e-04
Loss = 1.2776e-03, PNorm = 184.9179, GNorm = 0.0657, lr_0 = 1.0924e-04
Loss = 5.1371e-03, PNorm = 184.9199, GNorm = 0.0813, lr_0 = 1.0916e-04
Loss = 1.8849e-03, PNorm = 184.9220, GNorm = 0.1491, lr_0 = 1.0909e-04
Loss = 3.1342e-03, PNorm = 184.9229, GNorm = 0.0660, lr_0 = 1.0901e-04
Loss = 5.1746e-03, PNorm = 184.9244, GNorm = 0.2029, lr_0 = 1.0894e-04
Loss = 1.0931e-03, PNorm = 184.9267, GNorm = 0.0518, lr_0 = 1.0886e-04
Loss = 1.6468e-03, PNorm = 184.9277, GNorm = 0.0985, lr_0 = 1.0879e-04
Loss = 3.7686e-03, PNorm = 184.9291, GNorm = 0.0444, lr_0 = 1.0871e-04
Loss = 1.4955e-03, PNorm = 184.9312, GNorm = 0.1034, lr_0 = 1.0864e-04
Loss = 5.6593e-03, PNorm = 184.9323, GNorm = 0.1109, lr_0 = 1.0856e-04
Validation mae = 0.120610
Epoch 29
Loss = 6.2279e-03, PNorm = 184.9331, GNorm = 0.0820, lr_0 = 1.0849e-04
Loss = 1.4859e-03, PNorm = 184.9334, GNorm = 0.0598, lr_0 = 1.0841e-04
Loss = 1.5703e-03, PNorm = 184.9330, GNorm = 0.1156, lr_0 = 1.0834e-04
Loss = 1.4564e-03, PNorm = 184.9338, GNorm = 0.1257, lr_0 = 1.0827e-04
Loss = 2.7374e-03, PNorm = 184.9353, GNorm = 0.0387, lr_0 = 1.0819e-04
Loss = 1.3228e-03, PNorm = 184.9373, GNorm = 0.0688, lr_0 = 1.0812e-04
Loss = 1.1081e-03, PNorm = 184.9391, GNorm = 0.0459, lr_0 = 1.0804e-04
Loss = 4.1515e-03, PNorm = 184.9411, GNorm = 0.0788, lr_0 = 1.0797e-04
Loss = 4.5657e-03, PNorm = 184.9424, GNorm = 0.0895, lr_0 = 1.0790e-04
Loss = 1.0169e-03, PNorm = 184.9430, GNorm = 0.0447, lr_0 = 1.0782e-04
Loss = 2.8919e-03, PNorm = 184.9439, GNorm = 0.0554, lr_0 = 1.0775e-04
Loss = 1.7772e-03, PNorm = 184.9459, GNorm = 0.0811, lr_0 = 1.0767e-04
Loss = 8.4124e-04, PNorm = 184.9481, GNorm = 0.0890, lr_0 = 1.0760e-04
Loss = 3.3779e-03, PNorm = 184.9492, GNorm = 0.1967, lr_0 = 1.0753e-04
Loss = 3.2835e-03, PNorm = 184.9503, GNorm = 0.0824, lr_0 = 1.0745e-04
Loss = 2.2308e-03, PNorm = 184.9516, GNorm = 0.1465, lr_0 = 1.0738e-04
Loss = 1.1712e-03, PNorm = 184.9531, GNorm = 0.1923, lr_0 = 1.0731e-04
Loss = 1.3916e-03, PNorm = 184.9533, GNorm = 0.0929, lr_0 = 1.0723e-04
Loss = 2.2794e-03, PNorm = 184.9546, GNorm = 0.0565, lr_0 = 1.0716e-04
Loss = 2.4675e-03, PNorm = 184.9557, GNorm = 0.0324, lr_0 = 1.0709e-04
Loss = 3.2462e-03, PNorm = 184.9560, GNorm = 0.1118, lr_0 = 1.0701e-04
Loss = 3.1708e-03, PNorm = 184.9576, GNorm = 0.0586, lr_0 = 1.0694e-04
Loss = 1.6506e-03, PNorm = 184.9601, GNorm = 0.1664, lr_0 = 1.0687e-04
Loss = 3.3578e-03, PNorm = 184.9616, GNorm = 0.1661, lr_0 = 1.0679e-04
Loss = 1.8054e-03, PNorm = 184.9630, GNorm = 0.1106, lr_0 = 1.0672e-04
Loss = 3.0815e-03, PNorm = 184.9635, GNorm = 0.2933, lr_0 = 1.0665e-04
Loss = 1.1141e-03, PNorm = 184.9639, GNorm = 0.0764, lr_0 = 1.0657e-04
Loss = 4.4262e-03, PNorm = 184.9642, GNorm = 0.5026, lr_0 = 1.0650e-04
Loss = 1.8064e-03, PNorm = 184.9657, GNorm = 0.0582, lr_0 = 1.0643e-04
Loss = 1.1526e-03, PNorm = 184.9672, GNorm = 0.0737, lr_0 = 1.0635e-04
Loss = 9.3702e-04, PNorm = 184.9683, GNorm = 0.0568, lr_0 = 1.0628e-04
Loss = 9.2742e-04, PNorm = 184.9691, GNorm = 0.0382, lr_0 = 1.0621e-04
Loss = 1.0181e-03, PNorm = 184.9700, GNorm = 0.0497, lr_0 = 1.0614e-04
Loss = 1.0693e-02, PNorm = 184.9716, GNorm = 0.0819, lr_0 = 1.0606e-04
Loss = 1.8678e-03, PNorm = 184.9727, GNorm = 0.1573, lr_0 = 1.0599e-04
Loss = 2.8865e-03, PNorm = 184.9734, GNorm = 0.0967, lr_0 = 1.0592e-04
Loss = 1.3064e-03, PNorm = 184.9732, GNorm = 0.0460, lr_0 = 1.0585e-04
Loss = 3.0807e-03, PNorm = 184.9743, GNorm = 0.2393, lr_0 = 1.0577e-04
Loss = 4.2977e-03, PNorm = 184.9758, GNorm = 0.0560, lr_0 = 1.0570e-04
Loss = 1.3697e-03, PNorm = 184.9772, GNorm = 0.1258, lr_0 = 1.0563e-04
Loss = 3.9811e-03, PNorm = 184.9793, GNorm = 0.1003, lr_0 = 1.0556e-04
Loss = 1.0328e-03, PNorm = 184.9795, GNorm = 0.0511, lr_0 = 1.0548e-04
Loss = 2.7734e-03, PNorm = 184.9800, GNorm = 0.1340, lr_0 = 1.0541e-04
Loss = 8.4813e-04, PNorm = 184.9816, GNorm = 0.0501, lr_0 = 1.0534e-04
Loss = 2.4476e-03, PNorm = 184.9830, GNorm = 0.1114, lr_0 = 1.0527e-04
Loss = 9.1868e-04, PNorm = 184.9841, GNorm = 0.0872, lr_0 = 1.0519e-04
Loss = 3.7024e-03, PNorm = 184.9860, GNorm = 0.1044, lr_0 = 1.0512e-04
Loss = 1.1185e-03, PNorm = 184.9876, GNorm = 0.0662, lr_0 = 1.0505e-04
Loss = 1.4771e-03, PNorm = 184.9883, GNorm = 0.0901, lr_0 = 1.0498e-04
Loss = 5.0723e-03, PNorm = 184.9893, GNorm = 0.0915, lr_0 = 1.0491e-04
Loss = 5.5091e-03, PNorm = 184.9902, GNorm = 0.0456, lr_0 = 1.0483e-04
Loss = 1.4468e-03, PNorm = 184.9910, GNorm = 0.0599, lr_0 = 1.0476e-04
Loss = 1.9581e-03, PNorm = 184.9919, GNorm = 0.0839, lr_0 = 1.0469e-04
Loss = 2.4736e-03, PNorm = 184.9925, GNorm = 0.0429, lr_0 = 1.0462e-04
Loss = 1.3139e-03, PNorm = 184.9923, GNorm = 0.0842, lr_0 = 1.0455e-04
Loss = 1.9780e-03, PNorm = 184.9920, GNorm = 0.0659, lr_0 = 1.0448e-04
Loss = 5.0993e-03, PNorm = 184.9930, GNorm = 0.1155, lr_0 = 1.0440e-04
Loss = 7.5828e-03, PNorm = 184.9942, GNorm = 0.2212, lr_0 = 1.0433e-04
Loss = 5.4010e-03, PNorm = 184.9935, GNorm = 0.0470, lr_0 = 1.0426e-04
Loss = 2.5419e-03, PNorm = 184.9956, GNorm = 0.0746, lr_0 = 1.0419e-04
Loss = 3.5138e-03, PNorm = 184.9972, GNorm = 0.2329, lr_0 = 1.0412e-04
Loss = 3.3349e-03, PNorm = 184.9995, GNorm = 0.1208, lr_0 = 1.0405e-04
Loss = 4.2712e-03, PNorm = 185.0008, GNorm = 0.0943, lr_0 = 1.0398e-04
Loss = 3.5731e-03, PNorm = 185.0034, GNorm = 0.1757, lr_0 = 1.0391e-04
Loss = 2.0377e-03, PNorm = 185.0043, GNorm = 0.1325, lr_0 = 1.0383e-04
Loss = 5.0966e-03, PNorm = 185.0050, GNorm = 0.4913, lr_0 = 1.0376e-04
Loss = 2.2735e-03, PNorm = 185.0069, GNorm = 0.1433, lr_0 = 1.0369e-04
Loss = 3.4796e-03, PNorm = 185.0077, GNorm = 0.3267, lr_0 = 1.0362e-04
Loss = 3.0524e-03, PNorm = 185.0087, GNorm = 1.2634, lr_0 = 1.0355e-04
Loss = 2.7741e-03, PNorm = 185.0107, GNorm = 0.0721, lr_0 = 1.0348e-04
Loss = 1.5980e-03, PNorm = 185.0113, GNorm = 0.1578, lr_0 = 1.0341e-04
Loss = 5.9865e-03, PNorm = 185.0116, GNorm = 0.2386, lr_0 = 1.0334e-04
Loss = 8.7232e-03, PNorm = 185.0118, GNorm = 0.1378, lr_0 = 1.0327e-04
Loss = 1.7993e-03, PNorm = 185.0155, GNorm = 0.0885, lr_0 = 1.0320e-04
Loss = 5.0456e-03, PNorm = 185.0180, GNorm = 0.0528, lr_0 = 1.0312e-04
Loss = 2.8188e-03, PNorm = 185.0193, GNorm = 0.1185, lr_0 = 1.0305e-04
Loss = 9.8711e-04, PNorm = 185.0212, GNorm = 0.0795, lr_0 = 1.0298e-04
Loss = 1.3424e-03, PNorm = 185.0228, GNorm = 0.0599, lr_0 = 1.0291e-04
Loss = 1.0031e-03, PNorm = 185.0228, GNorm = 0.0698, lr_0 = 1.0284e-04
Loss = 2.0367e-03, PNorm = 185.0226, GNorm = 0.0514, lr_0 = 1.0277e-04
Loss = 2.9706e-03, PNorm = 185.0231, GNorm = 0.0996, lr_0 = 1.0270e-04
Loss = 2.9238e-03, PNorm = 185.0247, GNorm = 0.1978, lr_0 = 1.0263e-04
Loss = 2.0423e-03, PNorm = 185.0262, GNorm = 0.0944, lr_0 = 1.0256e-04
Loss = 1.0311e-03, PNorm = 185.0270, GNorm = 0.0679, lr_0 = 1.0249e-04
Loss = 1.7260e-03, PNorm = 185.0282, GNorm = 0.0632, lr_0 = 1.0242e-04
Loss = 3.7820e-03, PNorm = 185.0304, GNorm = 0.1519, lr_0 = 1.0235e-04
Loss = 7.6177e-04, PNorm = 185.0324, GNorm = 0.0695, lr_0 = 1.0228e-04
Loss = 2.3912e-03, PNorm = 185.0333, GNorm = 0.0774, lr_0 = 1.0221e-04
Loss = 4.1232e-03, PNorm = 185.0336, GNorm = 0.1453, lr_0 = 1.0214e-04
Loss = 3.2757e-03, PNorm = 185.0344, GNorm = 0.0654, lr_0 = 1.0207e-04
Loss = 3.6001e-03, PNorm = 185.0356, GNorm = 0.0836, lr_0 = 1.0200e-04
Loss = 3.7670e-03, PNorm = 185.0359, GNorm = 0.0816, lr_0 = 1.0193e-04
Loss = 3.8464e-03, PNorm = 185.0356, GNorm = 0.1396, lr_0 = 1.0186e-04
Loss = 1.4101e-03, PNorm = 185.0364, GNorm = 0.0448, lr_0 = 1.0179e-04
Loss = 2.8870e-03, PNorm = 185.0381, GNorm = 0.0653, lr_0 = 1.0172e-04
Loss = 2.3299e-03, PNorm = 185.0380, GNorm = 0.0451, lr_0 = 1.0165e-04
Loss = 3.8338e-03, PNorm = 185.0383, GNorm = 0.0761, lr_0 = 1.0158e-04
Loss = 1.6749e-03, PNorm = 185.0389, GNorm = 0.2698, lr_0 = 1.0151e-04
Loss = 1.1821e-02, PNorm = 185.0398, GNorm = 0.0944, lr_0 = 1.0144e-04
Loss = 4.6756e-03, PNorm = 185.0420, GNorm = 0.1378, lr_0 = 1.0137e-04
Loss = 1.0941e-03, PNorm = 185.0436, GNorm = 0.0419, lr_0 = 1.0130e-04
Loss = 3.1188e-03, PNorm = 185.0444, GNorm = 0.0625, lr_0 = 1.0123e-04
Loss = 3.0566e-03, PNorm = 185.0458, GNorm = 0.2724, lr_0 = 1.0116e-04
Loss = 4.0015e-03, PNorm = 185.0462, GNorm = 0.1263, lr_0 = 1.0110e-04
Loss = 4.2567e-03, PNorm = 185.0485, GNorm = 0.0385, lr_0 = 1.0103e-04
Loss = 7.2334e-03, PNorm = 185.0501, GNorm = 0.3981, lr_0 = 1.0096e-04
Loss = 1.7379e-03, PNorm = 185.0512, GNorm = 0.1293, lr_0 = 1.0089e-04
Loss = 3.7992e-03, PNorm = 185.0527, GNorm = 0.0681, lr_0 = 1.0082e-04
Loss = 1.0484e-03, PNorm = 185.0541, GNorm = 0.1358, lr_0 = 1.0075e-04
Loss = 3.5622e-03, PNorm = 185.0562, GNorm = 0.5813, lr_0 = 1.0068e-04
Loss = 8.0279e-04, PNorm = 185.0580, GNorm = 0.0406, lr_0 = 1.0061e-04
Loss = 4.6100e-03, PNorm = 185.0587, GNorm = 0.0458, lr_0 = 1.0054e-04
Loss = 1.0830e-03, PNorm = 185.0607, GNorm = 0.1124, lr_0 = 1.0047e-04
Loss = 3.1176e-03, PNorm = 185.0621, GNorm = 0.0918, lr_0 = 1.0041e-04
Loss = 9.3303e-04, PNorm = 185.0626, GNorm = 0.1716, lr_0 = 1.0034e-04
Loss = 1.6867e-03, PNorm = 185.0636, GNorm = 0.0990, lr_0 = 1.0027e-04
Loss = 1.0200e-03, PNorm = 185.0653, GNorm = 0.1034, lr_0 = 1.0020e-04
Loss = 4.5793e-03, PNorm = 185.0668, GNorm = 0.1446, lr_0 = 1.0013e-04
Loss = 1.4256e-03, PNorm = 185.0674, GNorm = 0.0910, lr_0 = 1.0006e-04
Loss = 4.2666e-03, PNorm = 185.0696, GNorm = 0.2210, lr_0 = 1.0000e-04
Validation mae = 0.120577
Model 0 best validation mae = 0.120577 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119483
Ensemble test mae = 0.119483
Fold 7
Splitting data with seed 7
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.3765e-01, PNorm = 64.6284, GNorm = 2.3943, lr_0 = 1.0413e-04
Loss = 7.9781e-01, PNorm = 64.6395, GNorm = 1.4963, lr_0 = 1.0788e-04
Loss = 9.4372e-01, PNorm = 64.6517, GNorm = 2.9893, lr_0 = 1.1163e-04
Loss = 7.4000e-01, PNorm = 64.6613, GNorm = 1.9956, lr_0 = 1.1537e-04
Loss = 7.0587e-01, PNorm = 64.6711, GNorm = 3.7006, lr_0 = 1.1913e-04
Loss = 7.3196e-01, PNorm = 64.6802, GNorm = 2.8451, lr_0 = 1.2287e-04
Loss = 7.4221e-01, PNorm = 64.6890, GNorm = 2.2615, lr_0 = 1.2663e-04
Loss = 6.5495e-01, PNorm = 64.6972, GNorm = 3.4022, lr_0 = 1.3038e-04
Loss = 6.1039e-01, PNorm = 64.7066, GNorm = 3.3702, lr_0 = 1.3413e-04
Loss = 6.5166e-01, PNorm = 64.7168, GNorm = 3.7786, lr_0 = 1.3788e-04
Loss = 6.4408e-01, PNorm = 64.7288, GNorm = 2.8115, lr_0 = 1.4163e-04
Loss = 6.2794e-01, PNorm = 64.7412, GNorm = 2.1693, lr_0 = 1.4537e-04
Loss = 6.2860e-01, PNorm = 64.7519, GNorm = 2.3868, lr_0 = 1.4913e-04
Loss = 6.1712e-01, PNorm = 64.7624, GNorm = 2.9217, lr_0 = 1.5288e-04
Loss = 6.6084e-01, PNorm = 64.7731, GNorm = 3.5137, lr_0 = 1.5662e-04
Loss = 5.9268e-01, PNorm = 64.7834, GNorm = 4.0168, lr_0 = 1.6038e-04
Loss = 4.9738e-01, PNorm = 64.7961, GNorm = 2.4763, lr_0 = 1.6412e-04
Loss = 5.5447e-01, PNorm = 64.8085, GNorm = 2.4249, lr_0 = 1.6788e-04
Loss = 6.4302e-01, PNorm = 64.8196, GNorm = 1.7492, lr_0 = 1.7163e-04
Loss = 5.6118e-01, PNorm = 64.8321, GNorm = 1.9966, lr_0 = 1.7538e-04
Loss = 5.7059e-01, PNorm = 64.8414, GNorm = 1.8123, lr_0 = 1.7913e-04
Loss = 5.6990e-01, PNorm = 64.8534, GNorm = 1.8119, lr_0 = 1.8288e-04
Loss = 6.6220e-01, PNorm = 64.8700, GNorm = 3.1274, lr_0 = 1.8662e-04
Loss = 6.1124e-01, PNorm = 64.8858, GNorm = 2.7748, lr_0 = 1.9038e-04
Loss = 6.2412e-01, PNorm = 64.9019, GNorm = 2.6283, lr_0 = 1.9413e-04
Loss = 5.2158e-01, PNorm = 64.9181, GNorm = 1.9792, lr_0 = 1.9788e-04
Loss = 6.2645e-01, PNorm = 64.9345, GNorm = 1.9075, lr_0 = 2.0163e-04
Loss = 6.0798e-01, PNorm = 64.9530, GNorm = 2.7131, lr_0 = 2.0537e-04
Loss = 6.0489e-01, PNorm = 64.9702, GNorm = 3.4338, lr_0 = 2.0913e-04
Loss = 5.3255e-01, PNorm = 64.9869, GNorm = 1.7261, lr_0 = 2.1288e-04
Loss = 5.6095e-01, PNorm = 65.0041, GNorm = 2.0199, lr_0 = 2.1663e-04
Loss = 6.5150e-01, PNorm = 65.0261, GNorm = 2.2636, lr_0 = 2.2038e-04
Loss = 7.5057e-01, PNorm = 65.0466, GNorm = 1.5678, lr_0 = 2.2412e-04
Loss = 5.0739e-01, PNorm = 65.0648, GNorm = 1.3697, lr_0 = 2.2787e-04
Loss = 6.2604e-01, PNorm = 65.0851, GNorm = 2.1465, lr_0 = 2.3163e-04
Loss = 5.0477e-01, PNorm = 65.1074, GNorm = 2.1410, lr_0 = 2.3538e-04
Loss = 5.9455e-01, PNorm = 65.1255, GNorm = 2.4517, lr_0 = 2.3913e-04
Loss = 6.0084e-01, PNorm = 65.1451, GNorm = 1.4990, lr_0 = 2.4288e-04
Loss = 5.4023e-01, PNorm = 65.1641, GNorm = 1.8327, lr_0 = 2.4662e-04
Loss = 5.2357e-01, PNorm = 65.1834, GNorm = 1.6477, lr_0 = 2.5038e-04
Loss = 5.8993e-01, PNorm = 65.2063, GNorm = 2.2152, lr_0 = 2.5413e-04
Loss = 5.5365e-01, PNorm = 65.2274, GNorm = 1.7975, lr_0 = 2.5788e-04
Loss = 5.9310e-01, PNorm = 65.2492, GNorm = 1.7984, lr_0 = 2.6163e-04
Loss = 5.2305e-01, PNorm = 65.2719, GNorm = 1.3691, lr_0 = 2.6537e-04
Loss = 5.3280e-01, PNorm = 65.2959, GNorm = 2.0172, lr_0 = 2.6912e-04
Loss = 5.3317e-01, PNorm = 65.3213, GNorm = 1.5366, lr_0 = 2.7288e-04
Loss = 5.8568e-01, PNorm = 65.3488, GNorm = 2.1110, lr_0 = 2.7663e-04
Loss = 5.8557e-01, PNorm = 65.3762, GNorm = 1.9496, lr_0 = 2.8038e-04
Loss = 5.3276e-01, PNorm = 65.3992, GNorm = 1.5370, lr_0 = 2.8413e-04
Loss = 5.1009e-01, PNorm = 65.4238, GNorm = 1.5043, lr_0 = 2.8787e-04
Loss = 6.0968e-01, PNorm = 65.4511, GNorm = 1.6118, lr_0 = 2.9163e-04
Loss = 5.6711e-01, PNorm = 65.4814, GNorm = 2.4193, lr_0 = 2.9538e-04
Loss = 6.2657e-01, PNorm = 65.5092, GNorm = 1.7283, lr_0 = 2.9913e-04
Loss = 6.8788e-01, PNorm = 65.5407, GNorm = 3.2275, lr_0 = 3.0288e-04
Loss = 5.4032e-01, PNorm = 65.5717, GNorm = 1.6970, lr_0 = 3.0662e-04
Loss = 4.9565e-01, PNorm = 65.6001, GNorm = 1.2949, lr_0 = 3.1037e-04
Loss = 5.4220e-01, PNorm = 65.6337, GNorm = 1.7152, lr_0 = 3.1413e-04
Loss = 4.6417e-01, PNorm = 65.6601, GNorm = 1.5291, lr_0 = 3.1788e-04
Loss = 4.7833e-01, PNorm = 65.6856, GNorm = 1.3574, lr_0 = 3.2163e-04
Loss = 4.9908e-01, PNorm = 65.7135, GNorm = 1.5362, lr_0 = 3.2538e-04
Loss = 5.1003e-01, PNorm = 65.7426, GNorm = 1.3663, lr_0 = 3.2912e-04
Loss = 4.8353e-01, PNorm = 65.7723, GNorm = 1.9880, lr_0 = 3.3288e-04
Loss = 4.7273e-01, PNorm = 65.8022, GNorm = 1.8603, lr_0 = 3.3663e-04
Loss = 5.2546e-01, PNorm = 65.8334, GNorm = 1.3951, lr_0 = 3.4038e-04
Loss = 5.3361e-01, PNorm = 65.8648, GNorm = 1.6068, lr_0 = 3.4413e-04
Loss = 4.4954e-01, PNorm = 65.9002, GNorm = 1.3881, lr_0 = 3.4787e-04
Loss = 5.3905e-01, PNorm = 65.9309, GNorm = 2.1720, lr_0 = 3.5162e-04
Loss = 5.7074e-01, PNorm = 65.9626, GNorm = 1.3362, lr_0 = 3.5538e-04
Loss = 5.4683e-01, PNorm = 65.9983, GNorm = 1.5736, lr_0 = 3.5913e-04
Loss = 5.2388e-01, PNorm = 66.0305, GNorm = 1.5038, lr_0 = 3.6288e-04
Loss = 4.6516e-01, PNorm = 66.0680, GNorm = 1.7101, lr_0 = 3.6662e-04
Loss = 5.0969e-01, PNorm = 66.1018, GNorm = 1.7544, lr_0 = 3.7037e-04
Loss = 4.9568e-01, PNorm = 66.1341, GNorm = 1.2612, lr_0 = 3.7413e-04
Loss = 4.8039e-01, PNorm = 66.1701, GNorm = 1.1203, lr_0 = 3.7788e-04
Loss = 5.7302e-01, PNorm = 66.2064, GNorm = 1.5447, lr_0 = 3.8163e-04
Loss = 5.2422e-01, PNorm = 66.2464, GNorm = 1.1682, lr_0 = 3.8537e-04
Loss = 5.9732e-01, PNorm = 66.2869, GNorm = 2.0807, lr_0 = 3.8912e-04
Loss = 5.2986e-01, PNorm = 66.3333, GNorm = 2.0899, lr_0 = 3.9287e-04
Loss = 5.1243e-01, PNorm = 66.3771, GNorm = 1.3858, lr_0 = 3.9663e-04
Loss = 4.7335e-01, PNorm = 66.4127, GNorm = 1.1623, lr_0 = 4.0038e-04
Loss = 5.0517e-01, PNorm = 66.4615, GNorm = 1.3997, lr_0 = 4.0413e-04
Loss = 5.3058e-01, PNorm = 66.5002, GNorm = 1.5192, lr_0 = 4.0787e-04
Loss = 5.2481e-01, PNorm = 66.5435, GNorm = 1.4323, lr_0 = 4.1162e-04
Loss = 5.0874e-01, PNorm = 66.5885, GNorm = 1.1784, lr_0 = 4.1537e-04
Loss = 4.8044e-01, PNorm = 66.6307, GNorm = 0.9977, lr_0 = 4.1913e-04
Loss = 4.6742e-01, PNorm = 66.6723, GNorm = 1.1248, lr_0 = 4.2288e-04
Loss = 5.6289e-01, PNorm = 66.7126, GNorm = 1.7514, lr_0 = 4.2662e-04
Loss = 4.5666e-01, PNorm = 66.7655, GNorm = 1.3657, lr_0 = 4.3037e-04
Loss = 5.4728e-01, PNorm = 66.8127, GNorm = 1.4407, lr_0 = 4.3412e-04
Loss = 5.2736e-01, PNorm = 66.8658, GNorm = 1.7906, lr_0 = 4.3788e-04
Loss = 5.0333e-01, PNorm = 66.9147, GNorm = 1.1918, lr_0 = 4.4163e-04
Loss = 5.0942e-01, PNorm = 66.9620, GNorm = 1.3130, lr_0 = 4.4538e-04
Loss = 5.1351e-01, PNorm = 67.0095, GNorm = 1.0187, lr_0 = 4.4912e-04
Loss = 4.7420e-01, PNorm = 67.0585, GNorm = 1.0935, lr_0 = 4.5287e-04
Loss = 5.5152e-01, PNorm = 67.1108, GNorm = 2.4403, lr_0 = 4.5662e-04
Loss = 5.0156e-01, PNorm = 67.1618, GNorm = 1.2620, lr_0 = 4.6038e-04
Loss = 4.8391e-01, PNorm = 67.2174, GNorm = 1.4336, lr_0 = 4.6413e-04
Loss = 5.0138e-01, PNorm = 67.2648, GNorm = 1.1614, lr_0 = 4.6787e-04
Loss = 4.2471e-01, PNorm = 67.3178, GNorm = 1.1282, lr_0 = 4.7162e-04
Loss = 5.1594e-01, PNorm = 67.3708, GNorm = 1.3483, lr_0 = 4.7537e-04
Loss = 5.6730e-01, PNorm = 67.4257, GNorm = 1.4988, lr_0 = 4.7913e-04
Loss = 5.2422e-01, PNorm = 67.4807, GNorm = 1.5396, lr_0 = 4.8288e-04
Loss = 5.0189e-01, PNorm = 67.5347, GNorm = 1.6250, lr_0 = 4.8663e-04
Loss = 4.4418e-01, PNorm = 67.5925, GNorm = 1.2407, lr_0 = 4.9038e-04
Loss = 6.1274e-01, PNorm = 67.6417, GNorm = 2.4650, lr_0 = 4.9412e-04
Loss = 5.6665e-01, PNorm = 67.7018, GNorm = 1.1590, lr_0 = 4.9788e-04
Loss = 5.2989e-01, PNorm = 67.7609, GNorm = 1.1360, lr_0 = 5.0163e-04
Loss = 5.2387e-01, PNorm = 67.8217, GNorm = 1.3826, lr_0 = 5.0538e-04
Loss = 4.7514e-01, PNorm = 67.8792, GNorm = 1.1004, lr_0 = 5.0913e-04
Loss = 4.8590e-01, PNorm = 67.9391, GNorm = 1.7915, lr_0 = 5.1287e-04
Loss = 4.6441e-01, PNorm = 67.9947, GNorm = 0.9948, lr_0 = 5.1663e-04
Loss = 4.3257e-01, PNorm = 68.0477, GNorm = 1.3794, lr_0 = 5.2038e-04
Loss = 4.9018e-01, PNorm = 68.1047, GNorm = 1.1412, lr_0 = 5.2413e-04
Loss = 4.8039e-01, PNorm = 68.1716, GNorm = 1.1827, lr_0 = 5.2788e-04
Loss = 5.0342e-01, PNorm = 68.2443, GNorm = 1.4978, lr_0 = 5.3162e-04
Loss = 4.9470e-01, PNorm = 68.3132, GNorm = 1.3301, lr_0 = 5.3538e-04
Loss = 4.5675e-01, PNorm = 68.3781, GNorm = 1.5749, lr_0 = 5.3912e-04
Loss = 5.3511e-01, PNorm = 68.4482, GNorm = 1.3062, lr_0 = 5.4288e-04
Loss = 4.9148e-01, PNorm = 68.5125, GNorm = 1.2876, lr_0 = 5.4663e-04
Loss = 4.6816e-01, PNorm = 68.5798, GNorm = 1.8748, lr_0 = 5.5038e-04
Validation mae = 0.129831
Epoch 1
Loss = 3.8144e-01, PNorm = 68.6524, GNorm = 1.0217, lr_0 = 5.5413e-04
Loss = 3.7985e-01, PNorm = 68.7179, GNorm = 1.1477, lr_0 = 5.5787e-04
Loss = 3.7382e-01, PNorm = 68.7986, GNorm = 1.1347, lr_0 = 5.6163e-04
Loss = 4.6560e-01, PNorm = 68.8701, GNorm = 1.4485, lr_0 = 5.6538e-04
Loss = 4.1370e-01, PNorm = 68.9579, GNorm = 1.1617, lr_0 = 5.6913e-04
Loss = 4.0204e-01, PNorm = 69.0437, GNorm = 1.1939, lr_0 = 5.7288e-04
Loss = 4.5438e-01, PNorm = 69.1330, GNorm = 1.1438, lr_0 = 5.7662e-04
Loss = 4.0127e-01, PNorm = 69.2178, GNorm = 1.5440, lr_0 = 5.8038e-04
Loss = 3.6141e-01, PNorm = 69.3025, GNorm = 0.9829, lr_0 = 5.8413e-04
Loss = 3.4151e-01, PNorm = 69.3951, GNorm = 1.0121, lr_0 = 5.8788e-04
Loss = 3.8571e-01, PNorm = 69.4747, GNorm = 1.2231, lr_0 = 5.9163e-04
Loss = 4.0286e-01, PNorm = 69.5657, GNorm = 1.3059, lr_0 = 5.9538e-04
Loss = 3.9588e-01, PNorm = 69.6612, GNorm = 1.2563, lr_0 = 5.9913e-04
Loss = 3.7470e-01, PNorm = 69.7525, GNorm = 1.1438, lr_0 = 6.0288e-04
Loss = 3.5746e-01, PNorm = 69.8564, GNorm = 1.0239, lr_0 = 6.0663e-04
Loss = 4.1048e-01, PNorm = 69.9646, GNorm = 1.1777, lr_0 = 6.1038e-04
Loss = 3.4541e-01, PNorm = 70.0717, GNorm = 1.0234, lr_0 = 6.1413e-04
Loss = 3.7253e-01, PNorm = 70.1848, GNorm = 1.4705, lr_0 = 6.1788e-04
Loss = 3.6811e-01, PNorm = 70.3048, GNorm = 1.4039, lr_0 = 6.2163e-04
Loss = 3.9594e-01, PNorm = 70.4252, GNorm = 1.3532, lr_0 = 6.2538e-04
Loss = 3.9884e-01, PNorm = 70.5442, GNorm = 0.9646, lr_0 = 6.2913e-04
Loss = 4.1809e-01, PNorm = 70.6659, GNorm = 1.4380, lr_0 = 6.3288e-04
Loss = 3.2217e-01, PNorm = 70.7837, GNorm = 1.2554, lr_0 = 6.3663e-04
Loss = 3.8709e-01, PNorm = 70.8927, GNorm = 1.3054, lr_0 = 6.4038e-04
Loss = 3.7937e-01, PNorm = 71.0038, GNorm = 1.1825, lr_0 = 6.4413e-04
Loss = 3.7395e-01, PNorm = 71.1159, GNorm = 1.0440, lr_0 = 6.4788e-04
Loss = 3.9104e-01, PNorm = 71.2198, GNorm = 1.4041, lr_0 = 6.5163e-04
Loss = 4.0307e-01, PNorm = 71.3247, GNorm = 1.4231, lr_0 = 6.5538e-04
Loss = 3.4841e-01, PNorm = 71.4316, GNorm = 1.0859, lr_0 = 6.5913e-04
Loss = 4.3445e-01, PNorm = 71.5430, GNorm = 1.4137, lr_0 = 6.6288e-04
Loss = 4.2903e-01, PNorm = 71.6586, GNorm = 1.2834, lr_0 = 6.6663e-04
Loss = 4.2174e-01, PNorm = 71.7862, GNorm = 1.5784, lr_0 = 6.7038e-04
Loss = 4.2072e-01, PNorm = 71.9251, GNorm = 1.2442, lr_0 = 6.7413e-04
Loss = 3.6476e-01, PNorm = 72.0525, GNorm = 1.6416, lr_0 = 6.7788e-04
Loss = 4.1504e-01, PNorm = 72.1728, GNorm = 1.3967, lr_0 = 6.8163e-04
Loss = 4.1256e-01, PNorm = 72.2942, GNorm = 1.2992, lr_0 = 6.8538e-04
Loss = 3.7076e-01, PNorm = 72.4122, GNorm = 1.4534, lr_0 = 6.8913e-04
Loss = 3.9235e-01, PNorm = 72.5336, GNorm = 1.5638, lr_0 = 6.9288e-04
Loss = 4.6353e-01, PNorm = 72.6560, GNorm = 1.1382, lr_0 = 6.9663e-04
Loss = 4.7399e-01, PNorm = 72.7887, GNorm = 1.2973, lr_0 = 7.0038e-04
Loss = 4.2836e-01, PNorm = 72.9147, GNorm = 0.9958, lr_0 = 7.0413e-04
Loss = 4.2647e-01, PNorm = 73.0495, GNorm = 1.6016, lr_0 = 7.0788e-04
Loss = 4.3420e-01, PNorm = 73.1660, GNorm = 1.8284, lr_0 = 7.1163e-04
Loss = 4.4295e-01, PNorm = 73.3114, GNorm = 1.5200, lr_0 = 7.1538e-04
Loss = 4.8340e-01, PNorm = 73.4499, GNorm = 1.1614, lr_0 = 7.1913e-04
Loss = 4.4035e-01, PNorm = 73.5979, GNorm = 1.2706, lr_0 = 7.2288e-04
Loss = 4.2516e-01, PNorm = 73.7434, GNorm = 1.1402, lr_0 = 7.2663e-04
Loss = 4.4646e-01, PNorm = 73.8790, GNorm = 1.5949, lr_0 = 7.3038e-04
Loss = 4.5146e-01, PNorm = 74.0129, GNorm = 1.1283, lr_0 = 7.3413e-04
Loss = 4.1257e-01, PNorm = 74.1627, GNorm = 1.0547, lr_0 = 7.3788e-04
Loss = 3.9648e-01, PNorm = 74.3028, GNorm = 1.1050, lr_0 = 7.4163e-04
Loss = 3.7747e-01, PNorm = 74.4292, GNorm = 0.7599, lr_0 = 7.4538e-04
Loss = 4.2933e-01, PNorm = 74.5567, GNorm = 1.0442, lr_0 = 7.4913e-04
Loss = 4.5229e-01, PNorm = 74.6839, GNorm = 1.4203, lr_0 = 7.5288e-04
Loss = 4.5650e-01, PNorm = 74.8222, GNorm = 1.1613, lr_0 = 7.5663e-04
Loss = 4.4315e-01, PNorm = 74.9699, GNorm = 1.0485, lr_0 = 7.6038e-04
Loss = 4.2151e-01, PNorm = 75.1123, GNorm = 0.9650, lr_0 = 7.6413e-04
Loss = 4.9741e-01, PNorm = 75.2570, GNorm = 1.3903, lr_0 = 7.6788e-04
Loss = 3.9926e-01, PNorm = 75.3874, GNorm = 1.0943, lr_0 = 7.7163e-04
Loss = 4.4962e-01, PNorm = 75.5136, GNorm = 1.4122, lr_0 = 7.7538e-04
Loss = 4.4970e-01, PNorm = 75.6373, GNorm = 1.5747, lr_0 = 7.7913e-04
Loss = 4.2705e-01, PNorm = 75.7577, GNorm = 1.0590, lr_0 = 7.8288e-04
Loss = 4.1285e-01, PNorm = 75.8751, GNorm = 1.1623, lr_0 = 7.8663e-04
Loss = 4.7845e-01, PNorm = 76.0056, GNorm = 1.1695, lr_0 = 7.9038e-04
Loss = 4.2945e-01, PNorm = 76.1432, GNorm = 1.2405, lr_0 = 7.9413e-04
Loss = 3.7593e-01, PNorm = 76.2757, GNorm = 1.0182, lr_0 = 7.9788e-04
Loss = 4.2353e-01, PNorm = 76.4032, GNorm = 1.4364, lr_0 = 8.0163e-04
Loss = 4.2554e-01, PNorm = 76.5326, GNorm = 1.0834, lr_0 = 8.0538e-04
Loss = 3.7896e-01, PNorm = 76.6660, GNorm = 1.2016, lr_0 = 8.0913e-04
Loss = 4.1905e-01, PNorm = 76.7957, GNorm = 1.1082, lr_0 = 8.1288e-04
Loss = 4.4804e-01, PNorm = 76.9384, GNorm = 0.9434, lr_0 = 8.1663e-04
Loss = 4.3101e-01, PNorm = 77.0768, GNorm = 1.0586, lr_0 = 8.2038e-04
Loss = 4.4265e-01, PNorm = 77.2158, GNorm = 1.0966, lr_0 = 8.2413e-04
Loss = 4.4375e-01, PNorm = 77.3575, GNorm = 1.0457, lr_0 = 8.2788e-04
Loss = 4.3158e-01, PNorm = 77.4980, GNorm = 1.2409, lr_0 = 8.3163e-04
Loss = 4.3396e-01, PNorm = 77.6416, GNorm = 1.2875, lr_0 = 8.3538e-04
Loss = 4.2502e-01, PNorm = 77.7689, GNorm = 0.9526, lr_0 = 8.3913e-04
Loss = 4.5300e-01, PNorm = 77.9165, GNorm = 0.9750, lr_0 = 8.4288e-04
Loss = 3.9931e-01, PNorm = 78.0669, GNorm = 0.9271, lr_0 = 8.4663e-04
Loss = 4.8592e-01, PNorm = 78.2258, GNorm = 1.5271, lr_0 = 8.5038e-04
Loss = 4.7716e-01, PNorm = 78.3935, GNorm = 1.1087, lr_0 = 8.5413e-04
Loss = 4.3304e-01, PNorm = 78.5471, GNorm = 0.9789, lr_0 = 8.5788e-04
Loss = 4.3750e-01, PNorm = 78.7043, GNorm = 1.7257, lr_0 = 8.6163e-04
Loss = 4.3397e-01, PNorm = 78.8505, GNorm = 1.1833, lr_0 = 8.6538e-04
Loss = 3.7887e-01, PNorm = 78.9907, GNorm = 1.3974, lr_0 = 8.6913e-04
Loss = 4.4168e-01, PNorm = 79.1275, GNorm = 1.1660, lr_0 = 8.7288e-04
Loss = 4.0683e-01, PNorm = 79.2731, GNorm = 1.5795, lr_0 = 8.7663e-04
Loss = 4.3730e-01, PNorm = 79.4057, GNorm = 1.5211, lr_0 = 8.8038e-04
Loss = 3.8936e-01, PNorm = 79.5369, GNorm = 1.5399, lr_0 = 8.8413e-04
Loss = 4.1919e-01, PNorm = 79.6777, GNorm = 1.1006, lr_0 = 8.8788e-04
Loss = 4.2658e-01, PNorm = 79.8250, GNorm = 1.6649, lr_0 = 8.9163e-04
Loss = 3.5408e-01, PNorm = 79.9689, GNorm = 0.9712, lr_0 = 8.9538e-04
Loss = 4.0762e-01, PNorm = 80.1026, GNorm = 0.9607, lr_0 = 8.9913e-04
Loss = 4.3316e-01, PNorm = 80.2470, GNorm = 0.8572, lr_0 = 9.0288e-04
Loss = 4.2904e-01, PNorm = 80.3948, GNorm = 1.0271, lr_0 = 9.0663e-04
Loss = 4.7812e-01, PNorm = 80.5449, GNorm = 0.9657, lr_0 = 9.1038e-04
Loss = 4.8590e-01, PNorm = 80.6984, GNorm = 2.1558, lr_0 = 9.1413e-04
Loss = 4.3272e-01, PNorm = 80.8623, GNorm = 1.8108, lr_0 = 9.1788e-04
Loss = 4.6591e-01, PNorm = 81.0360, GNorm = 1.1096, lr_0 = 9.2163e-04
Loss = 4.9533e-01, PNorm = 81.2121, GNorm = 1.3170, lr_0 = 9.2538e-04
Loss = 4.0572e-01, PNorm = 81.3780, GNorm = 1.5425, lr_0 = 9.2913e-04
Loss = 4.2223e-01, PNorm = 81.5411, GNorm = 1.1353, lr_0 = 9.3288e-04
Loss = 4.2249e-01, PNorm = 81.6865, GNorm = 1.2627, lr_0 = 9.3663e-04
Loss = 3.6675e-01, PNorm = 81.8218, GNorm = 0.9504, lr_0 = 9.4038e-04
Loss = 4.1045e-01, PNorm = 81.9576, GNorm = 1.4143, lr_0 = 9.4413e-04
Loss = 3.9759e-01, PNorm = 82.0954, GNorm = 0.7540, lr_0 = 9.4788e-04
Loss = 4.3361e-01, PNorm = 82.2630, GNorm = 0.8956, lr_0 = 9.5163e-04
Loss = 4.9890e-01, PNorm = 82.4177, GNorm = 1.2116, lr_0 = 9.5538e-04
Loss = 4.5074e-01, PNorm = 82.5902, GNorm = 1.1543, lr_0 = 9.5913e-04
Loss = 4.4307e-01, PNorm = 82.7444, GNorm = 1.1166, lr_0 = 9.6288e-04
Loss = 4.5864e-01, PNorm = 82.8877, GNorm = 0.7311, lr_0 = 9.6663e-04
Loss = 4.1542e-01, PNorm = 83.0418, GNorm = 1.1418, lr_0 = 9.7038e-04
Loss = 4.1946e-01, PNorm = 83.1930, GNorm = 1.0536, lr_0 = 9.7413e-04
Loss = 4.0546e-01, PNorm = 83.3519, GNorm = 0.9147, lr_0 = 9.7788e-04
Loss = 4.1684e-01, PNorm = 83.5043, GNorm = 1.2221, lr_0 = 9.8163e-04
Loss = 4.2606e-01, PNorm = 83.6774, GNorm = 1.4425, lr_0 = 9.8537e-04
Loss = 4.3187e-01, PNorm = 83.8210, GNorm = 1.0600, lr_0 = 9.8912e-04
Loss = 4.2733e-01, PNorm = 83.9858, GNorm = 0.9574, lr_0 = 9.9288e-04
Loss = 4.1230e-01, PNorm = 84.1368, GNorm = 0.8100, lr_0 = 9.9663e-04
Loss = 4.1382e-01, PNorm = 84.2948, GNorm = 1.0525, lr_0 = 9.9993e-04
Validation mae = 0.127641
Epoch 2
Loss = 3.4378e-01, PNorm = 84.4628, GNorm = 0.9750, lr_0 = 9.9925e-04
Loss = 2.6705e-01, PNorm = 84.6264, GNorm = 0.7276, lr_0 = 9.9856e-04
Loss = 2.3266e-01, PNorm = 84.7717, GNorm = 0.8656, lr_0 = 9.9788e-04
Loss = 2.5760e-01, PNorm = 84.9130, GNorm = 0.8791, lr_0 = 9.9719e-04
Loss = 2.8172e-01, PNorm = 85.0702, GNorm = 1.0373, lr_0 = 9.9651e-04
Loss = 2.6324e-01, PNorm = 85.2367, GNorm = 1.2865, lr_0 = 9.9583e-04
Loss = 2.4019e-01, PNorm = 85.3941, GNorm = 0.8789, lr_0 = 9.9515e-04
Loss = 2.4493e-01, PNorm = 85.5472, GNorm = 1.3212, lr_0 = 9.9446e-04
Loss = 2.9042e-01, PNorm = 85.7000, GNorm = 0.9471, lr_0 = 9.9378e-04
Loss = 2.8578e-01, PNorm = 85.8586, GNorm = 0.8391, lr_0 = 9.9310e-04
Loss = 2.6460e-01, PNorm = 86.0369, GNorm = 1.1330, lr_0 = 9.9242e-04
Loss = 2.6517e-01, PNorm = 86.2075, GNorm = 1.1098, lr_0 = 9.9174e-04
Loss = 3.0433e-01, PNorm = 86.3918, GNorm = 1.0153, lr_0 = 9.9106e-04
Loss = 2.6723e-01, PNorm = 86.5648, GNorm = 0.8774, lr_0 = 9.9038e-04
Loss = 3.1766e-01, PNorm = 86.7369, GNorm = 1.6044, lr_0 = 9.8971e-04
Loss = 2.8365e-01, PNorm = 86.9052, GNorm = 0.8718, lr_0 = 9.8903e-04
Loss = 2.8220e-01, PNorm = 87.0566, GNorm = 0.7951, lr_0 = 9.8835e-04
Loss = 2.6609e-01, PNorm = 87.1992, GNorm = 0.7513, lr_0 = 9.8767e-04
Loss = 2.5402e-01, PNorm = 87.3530, GNorm = 0.8234, lr_0 = 9.8700e-04
Loss = 2.7467e-01, PNorm = 87.4954, GNorm = 0.9038, lr_0 = 9.8632e-04
Loss = 3.3214e-01, PNorm = 87.6652, GNorm = 1.1989, lr_0 = 9.8564e-04
Loss = 2.7686e-01, PNorm = 87.8522, GNorm = 0.9841, lr_0 = 9.8497e-04
Loss = 2.5979e-01, PNorm = 88.0331, GNorm = 1.0720, lr_0 = 9.8429e-04
Loss = 2.7937e-01, PNorm = 88.2181, GNorm = 2.2019, lr_0 = 9.8362e-04
Loss = 3.2861e-01, PNorm = 88.3940, GNorm = 0.8732, lr_0 = 9.8295e-04
Loss = 2.5835e-01, PNorm = 88.5841, GNorm = 1.0372, lr_0 = 9.8227e-04
Loss = 2.9504e-01, PNorm = 88.7572, GNorm = 0.8223, lr_0 = 9.8160e-04
Loss = 3.0149e-01, PNorm = 88.9362, GNorm = 1.0060, lr_0 = 9.8093e-04
Loss = 2.7027e-01, PNorm = 89.1165, GNorm = 0.9695, lr_0 = 9.8026e-04
Loss = 2.8004e-01, PNorm = 89.2939, GNorm = 1.0720, lr_0 = 9.7958e-04
Loss = 3.0732e-01, PNorm = 89.4642, GNorm = 1.0728, lr_0 = 9.7891e-04
Loss = 2.9965e-01, PNorm = 89.6662, GNorm = 1.0638, lr_0 = 9.7824e-04
Loss = 3.2914e-01, PNorm = 89.8553, GNorm = 1.6213, lr_0 = 9.7757e-04
Loss = 3.4169e-01, PNorm = 90.0428, GNorm = 0.9120, lr_0 = 9.7690e-04
Loss = 3.3163e-01, PNorm = 90.2310, GNorm = 2.1323, lr_0 = 9.7623e-04
Loss = 2.6324e-01, PNorm = 90.3999, GNorm = 0.7590, lr_0 = 9.7556e-04
Loss = 3.0244e-01, PNorm = 90.5666, GNorm = 0.6572, lr_0 = 9.7490e-04
Loss = 3.4123e-01, PNorm = 90.7339, GNorm = 0.7587, lr_0 = 9.7423e-04
Loss = 3.4299e-01, PNorm = 90.9199, GNorm = 1.3249, lr_0 = 9.7356e-04
Loss = 2.9995e-01, PNorm = 91.0969, GNorm = 0.8661, lr_0 = 9.7289e-04
Loss = 2.5021e-01, PNorm = 91.2920, GNorm = 1.0201, lr_0 = 9.7223e-04
Loss = 2.7629e-01, PNorm = 91.4524, GNorm = 1.1922, lr_0 = 9.7156e-04
Loss = 3.0135e-01, PNorm = 91.6225, GNorm = 1.1694, lr_0 = 9.7090e-04
Loss = 3.0995e-01, PNorm = 91.7997, GNorm = 1.0384, lr_0 = 9.7023e-04
Loss = 3.2113e-01, PNorm = 91.9792, GNorm = 0.8080, lr_0 = 9.6957e-04
Loss = 3.4222e-01, PNorm = 92.1692, GNorm = 0.9900, lr_0 = 9.6890e-04
Loss = 2.7617e-01, PNorm = 92.3623, GNorm = 0.9306, lr_0 = 9.6824e-04
Loss = 3.0364e-01, PNorm = 92.5508, GNorm = 0.8181, lr_0 = 9.6757e-04
Loss = 3.1501e-01, PNorm = 92.7356, GNorm = 1.0284, lr_0 = 9.6691e-04
Loss = 3.2775e-01, PNorm = 92.9145, GNorm = 1.4212, lr_0 = 9.6625e-04
Loss = 3.2307e-01, PNorm = 93.1021, GNorm = 0.8674, lr_0 = 9.6559e-04
Loss = 3.1724e-01, PNorm = 93.2951, GNorm = 1.3255, lr_0 = 9.6493e-04
Loss = 2.9598e-01, PNorm = 93.4790, GNorm = 0.8412, lr_0 = 9.6427e-04
Loss = 3.8732e-01, PNorm = 93.6648, GNorm = 1.0438, lr_0 = 9.6360e-04
Loss = 3.1186e-01, PNorm = 93.8589, GNorm = 1.2552, lr_0 = 9.6294e-04
Loss = 3.5897e-01, PNorm = 94.0354, GNorm = 1.6877, lr_0 = 9.6228e-04
Loss = 3.1763e-01, PNorm = 94.2113, GNorm = 1.8158, lr_0 = 9.6163e-04
Loss = 3.2551e-01, PNorm = 94.3837, GNorm = 1.3608, lr_0 = 9.6097e-04
Loss = 3.4381e-01, PNorm = 94.5634, GNorm = 1.5433, lr_0 = 9.6031e-04
Loss = 3.4190e-01, PNorm = 94.7314, GNorm = 1.3088, lr_0 = 9.5965e-04
Loss = 3.3438e-01, PNorm = 94.9077, GNorm = 0.8234, lr_0 = 9.5899e-04
Loss = 2.7573e-01, PNorm = 95.0643, GNorm = 0.8914, lr_0 = 9.5834e-04
Loss = 3.6435e-01, PNorm = 95.2186, GNorm = 0.7614, lr_0 = 9.5768e-04
Loss = 3.2106e-01, PNorm = 95.3723, GNorm = 0.9223, lr_0 = 9.5702e-04
Loss = 3.9216e-01, PNorm = 95.5419, GNorm = 0.8930, lr_0 = 9.5637e-04
Loss = 3.2810e-01, PNorm = 95.7193, GNorm = 1.2560, lr_0 = 9.5571e-04
Loss = 3.7068e-01, PNorm = 95.9018, GNorm = 0.7037, lr_0 = 9.5506e-04
Loss = 3.9389e-01, PNorm = 96.0954, GNorm = 1.9475, lr_0 = 9.5440e-04
Loss = 3.4414e-01, PNorm = 96.2615, GNorm = 1.4601, lr_0 = 9.5375e-04
Loss = 3.8101e-01, PNorm = 96.4367, GNorm = 1.0911, lr_0 = 9.5310e-04
Loss = 3.6566e-01, PNorm = 96.6242, GNorm = 1.4734, lr_0 = 9.5244e-04
Loss = 3.2266e-01, PNorm = 96.8009, GNorm = 1.0251, lr_0 = 9.5179e-04
Loss = 2.8609e-01, PNorm = 96.9627, GNorm = 1.0519, lr_0 = 9.5114e-04
Loss = 3.0273e-01, PNorm = 97.1195, GNorm = 1.4555, lr_0 = 9.5049e-04
Loss = 3.1025e-01, PNorm = 97.2744, GNorm = 1.2198, lr_0 = 9.4984e-04
Loss = 2.7913e-01, PNorm = 97.4194, GNorm = 0.9842, lr_0 = 9.4919e-04
Loss = 3.6625e-01, PNorm = 97.5874, GNorm = 0.9486, lr_0 = 9.4854e-04
Loss = 2.7013e-01, PNorm = 97.7542, GNorm = 1.1105, lr_0 = 9.4789e-04
Loss = 3.0651e-01, PNorm = 97.9101, GNorm = 1.5142, lr_0 = 9.4724e-04
Loss = 2.6827e-01, PNorm = 98.0550, GNorm = 1.3631, lr_0 = 9.4659e-04
Loss = 3.0977e-01, PNorm = 98.2114, GNorm = 0.9772, lr_0 = 9.4594e-04
Loss = 3.4924e-01, PNorm = 98.3757, GNorm = 0.7694, lr_0 = 9.4529e-04
Loss = 3.4160e-01, PNorm = 98.5449, GNorm = 1.5362, lr_0 = 9.4464e-04
Loss = 3.0478e-01, PNorm = 98.6902, GNorm = 0.8151, lr_0 = 9.4400e-04
Loss = 2.9693e-01, PNorm = 98.8478, GNorm = 0.7770, lr_0 = 9.4335e-04
Loss = 2.7150e-01, PNorm = 98.9836, GNorm = 0.9358, lr_0 = 9.4270e-04
Loss = 3.1356e-01, PNorm = 99.1201, GNorm = 2.0170, lr_0 = 9.4206e-04
Loss = 3.4763e-01, PNorm = 99.2587, GNorm = 0.8221, lr_0 = 9.4141e-04
Loss = 4.0803e-01, PNorm = 99.4173, GNorm = 0.7390, lr_0 = 9.4077e-04
Loss = 3.0787e-01, PNorm = 99.5615, GNorm = 1.8165, lr_0 = 9.4012e-04
Loss = 3.5234e-01, PNorm = 99.7103, GNorm = 1.1717, lr_0 = 9.3948e-04
Loss = 3.5123e-01, PNorm = 99.8658, GNorm = 0.9720, lr_0 = 9.3884e-04
Loss = 3.4865e-01, PNorm = 100.0187, GNorm = 1.5529, lr_0 = 9.3819e-04
Loss = 3.2899e-01, PNorm = 100.1733, GNorm = 0.9763, lr_0 = 9.3755e-04
Loss = 3.3725e-01, PNorm = 100.3174, GNorm = 0.8037, lr_0 = 9.3691e-04
Loss = 3.4862e-01, PNorm = 100.4639, GNorm = 1.0046, lr_0 = 9.3627e-04
Loss = 3.0869e-01, PNorm = 100.5992, GNorm = 0.7881, lr_0 = 9.3562e-04
Loss = 3.4027e-01, PNorm = 100.7307, GNorm = 0.9070, lr_0 = 9.3498e-04
Loss = 3.3013e-01, PNorm = 100.8821, GNorm = 1.1134, lr_0 = 9.3434e-04
Loss = 3.5925e-01, PNorm = 101.0233, GNorm = 1.2046, lr_0 = 9.3370e-04
Loss = 3.0547e-01, PNorm = 101.1894, GNorm = 1.0659, lr_0 = 9.3306e-04
Loss = 3.2495e-01, PNorm = 101.3421, GNorm = 0.9792, lr_0 = 9.3242e-04
Loss = 3.5601e-01, PNorm = 101.4944, GNorm = 1.1557, lr_0 = 9.3178e-04
Loss = 3.2387e-01, PNorm = 101.6566, GNorm = 1.3546, lr_0 = 9.3115e-04
Loss = 3.3831e-01, PNorm = 101.8086, GNorm = 0.9860, lr_0 = 9.3051e-04
Loss = 2.9690e-01, PNorm = 101.9487, GNorm = 0.8400, lr_0 = 9.2987e-04
Loss = 2.6280e-01, PNorm = 102.0773, GNorm = 1.1047, lr_0 = 9.2923e-04
Loss = 3.3094e-01, PNorm = 102.2129, GNorm = 0.8281, lr_0 = 9.2860e-04
Loss = 3.2367e-01, PNorm = 102.3599, GNorm = 1.1322, lr_0 = 9.2796e-04
Loss = 3.2666e-01, PNorm = 102.5129, GNorm = 0.9352, lr_0 = 9.2733e-04
Loss = 3.6387e-01, PNorm = 102.6734, GNorm = 1.4683, lr_0 = 9.2669e-04
Loss = 3.1354e-01, PNorm = 102.8188, GNorm = 0.9863, lr_0 = 9.2606e-04
Loss = 3.6111e-01, PNorm = 102.9691, GNorm = 0.8279, lr_0 = 9.2542e-04
Loss = 3.4109e-01, PNorm = 103.1294, GNorm = 1.2768, lr_0 = 9.2479e-04
Loss = 2.8630e-01, PNorm = 103.2758, GNorm = 0.8815, lr_0 = 9.2415e-04
Loss = 3.6827e-01, PNorm = 103.4108, GNorm = 1.7167, lr_0 = 9.2352e-04
Loss = 3.6170e-01, PNorm = 103.5621, GNorm = 1.9151, lr_0 = 9.2289e-04
Loss = 3.3757e-01, PNorm = 103.7137, GNorm = 1.1325, lr_0 = 9.2226e-04
Loss = 3.2064e-01, PNorm = 103.8653, GNorm = 0.8137, lr_0 = 9.2162e-04
Loss = 2.8629e-01, PNorm = 104.0081, GNorm = 0.9483, lr_0 = 9.2099e-04
Validation mae = 0.124838
Epoch 3
Loss = 1.8636e-01, PNorm = 104.1408, GNorm = 0.8955, lr_0 = 9.2036e-04
Loss = 1.8272e-01, PNorm = 104.2624, GNorm = 0.8603, lr_0 = 9.1973e-04
Loss = 1.6662e-01, PNorm = 104.3560, GNorm = 0.6187, lr_0 = 9.1910e-04
Loss = 1.4608e-01, PNorm = 104.4461, GNorm = 0.7519, lr_0 = 9.1847e-04
Loss = 1.6400e-01, PNorm = 104.5293, GNorm = 0.5845, lr_0 = 9.1784e-04
Loss = 1.9344e-01, PNorm = 104.6201, GNorm = 1.8197, lr_0 = 9.1721e-04
Loss = 1.6662e-01, PNorm = 104.7175, GNorm = 0.8002, lr_0 = 9.1658e-04
Loss = 1.6925e-01, PNorm = 104.8115, GNorm = 0.8938, lr_0 = 9.1596e-04
Loss = 1.8258e-01, PNorm = 104.9095, GNorm = 1.3072, lr_0 = 9.1533e-04
Loss = 1.7690e-01, PNorm = 105.0138, GNorm = 0.9072, lr_0 = 9.1470e-04
Loss = 1.5425e-01, PNorm = 105.1093, GNorm = 0.9370, lr_0 = 9.1408e-04
Loss = 2.1690e-01, PNorm = 105.2203, GNorm = 0.7478, lr_0 = 9.1345e-04
Loss = 2.0664e-01, PNorm = 105.3091, GNorm = 1.0051, lr_0 = 9.1282e-04
Loss = 1.8051e-01, PNorm = 105.4310, GNorm = 1.0315, lr_0 = 9.1220e-04
Loss = 1.8069e-01, PNorm = 105.5289, GNorm = 0.9457, lr_0 = 9.1157e-04
Loss = 1.8910e-01, PNorm = 105.6389, GNorm = 0.7806, lr_0 = 9.1095e-04
Loss = 1.7254e-01, PNorm = 105.7464, GNorm = 1.0931, lr_0 = 9.1032e-04
Loss = 1.6512e-01, PNorm = 105.8556, GNorm = 0.6520, lr_0 = 9.0970e-04
Loss = 1.6929e-01, PNorm = 105.9677, GNorm = 0.7136, lr_0 = 9.0908e-04
Loss = 2.3296e-01, PNorm = 106.0753, GNorm = 0.9931, lr_0 = 9.0846e-04
Loss = 1.9842e-01, PNorm = 106.1964, GNorm = 1.0070, lr_0 = 9.0783e-04
Loss = 2.2898e-01, PNorm = 106.3260, GNorm = 2.8683, lr_0 = 9.0721e-04
Loss = 1.9729e-01, PNorm = 106.4560, GNorm = 1.1824, lr_0 = 9.0659e-04
Loss = 1.8653e-01, PNorm = 106.5749, GNorm = 0.9296, lr_0 = 9.0597e-04
Loss = 2.0173e-01, PNorm = 106.7021, GNorm = 0.6888, lr_0 = 9.0535e-04
Loss = 1.8730e-01, PNorm = 106.8315, GNorm = 0.9059, lr_0 = 9.0473e-04
Loss = 1.8443e-01, PNorm = 106.9461, GNorm = 0.8012, lr_0 = 9.0411e-04
Loss = 1.8811e-01, PNorm = 107.0680, GNorm = 0.7770, lr_0 = 9.0349e-04
Loss = 2.0391e-01, PNorm = 107.1784, GNorm = 0.7115, lr_0 = 9.0287e-04
Loss = 2.0319e-01, PNorm = 107.2902, GNorm = 1.0369, lr_0 = 9.0225e-04
Loss = 1.8551e-01, PNorm = 107.4079, GNorm = 0.7010, lr_0 = 9.0163e-04
Loss = 1.9833e-01, PNorm = 107.5263, GNorm = 0.7496, lr_0 = 9.0102e-04
Loss = 2.0526e-01, PNorm = 107.6644, GNorm = 0.6915, lr_0 = 9.0040e-04
Loss = 1.8205e-01, PNorm = 107.7782, GNorm = 0.8824, lr_0 = 8.9978e-04
Loss = 1.9755e-01, PNorm = 107.9019, GNorm = 0.6788, lr_0 = 8.9916e-04
Loss = 2.1406e-01, PNorm = 108.0077, GNorm = 1.0590, lr_0 = 8.9855e-04
Loss = 1.8738e-01, PNorm = 108.1339, GNorm = 0.9494, lr_0 = 8.9793e-04
Loss = 2.0281e-01, PNorm = 108.2479, GNorm = 0.7180, lr_0 = 8.9732e-04
Loss = 1.9258e-01, PNorm = 108.3660, GNorm = 1.3519, lr_0 = 8.9670e-04
Loss = 1.8973e-01, PNorm = 108.4873, GNorm = 0.7240, lr_0 = 8.9609e-04
Loss = 1.9343e-01, PNorm = 108.6152, GNorm = 1.1453, lr_0 = 8.9548e-04
Loss = 1.9697e-01, PNorm = 108.7314, GNorm = 0.7004, lr_0 = 8.9486e-04
Loss = 1.9446e-01, PNorm = 108.8414, GNorm = 1.4532, lr_0 = 8.9425e-04
Loss = 2.0647e-01, PNorm = 108.9554, GNorm = 2.7553, lr_0 = 8.9364e-04
Loss = 1.9149e-01, PNorm = 109.0590, GNorm = 1.3701, lr_0 = 8.9302e-04
Loss = 1.8183e-01, PNorm = 109.1724, GNorm = 0.7703, lr_0 = 8.9241e-04
Loss = 1.7848e-01, PNorm = 109.2965, GNorm = 0.6382, lr_0 = 8.9180e-04
Loss = 2.0412e-01, PNorm = 109.4164, GNorm = 1.2993, lr_0 = 8.9119e-04
Loss = 1.8158e-01, PNorm = 109.5231, GNorm = 0.8371, lr_0 = 8.9058e-04
Loss = 1.7605e-01, PNorm = 109.6479, GNorm = 0.9665, lr_0 = 8.8997e-04
Loss = 2.0579e-01, PNorm = 109.7568, GNorm = 0.9917, lr_0 = 8.8936e-04
Loss = 2.0110e-01, PNorm = 109.8811, GNorm = 0.9671, lr_0 = 8.8875e-04
Loss = 2.1825e-01, PNorm = 110.0064, GNorm = 0.7424, lr_0 = 8.8814e-04
Loss = 2.0809e-01, PNorm = 110.1288, GNorm = 0.7972, lr_0 = 8.8753e-04
Loss = 1.9954e-01, PNorm = 110.2493, GNorm = 0.6436, lr_0 = 8.8693e-04
Loss = 1.7878e-01, PNorm = 110.3797, GNorm = 0.8382, lr_0 = 8.8632e-04
Loss = 1.9122e-01, PNorm = 110.4887, GNorm = 1.0083, lr_0 = 8.8571e-04
Loss = 2.0601e-01, PNorm = 110.6107, GNorm = 0.8548, lr_0 = 8.8510e-04
Loss = 2.1297e-01, PNorm = 110.7336, GNorm = 1.2650, lr_0 = 8.8450e-04
Loss = 1.9724e-01, PNorm = 110.8660, GNorm = 1.0839, lr_0 = 8.8389e-04
Loss = 1.9447e-01, PNorm = 110.9898, GNorm = 0.9188, lr_0 = 8.8329e-04
Loss = 1.7864e-01, PNorm = 111.1235, GNorm = 0.7293, lr_0 = 8.8268e-04
Loss = 2.0303e-01, PNorm = 111.2489, GNorm = 0.9847, lr_0 = 8.8208e-04
Loss = 1.8459e-01, PNorm = 111.3735, GNorm = 1.0724, lr_0 = 8.8147e-04
Loss = 2.0510e-01, PNorm = 111.4896, GNorm = 1.1888, lr_0 = 8.8087e-04
Loss = 1.8416e-01, PNorm = 111.6040, GNorm = 0.9408, lr_0 = 8.8026e-04
Loss = 2.2639e-01, PNorm = 111.7265, GNorm = 0.7389, lr_0 = 8.7966e-04
Loss = 2.3399e-01, PNorm = 111.8616, GNorm = 0.8837, lr_0 = 8.7906e-04
Loss = 1.9931e-01, PNorm = 111.9883, GNorm = 0.9025, lr_0 = 8.7846e-04
Loss = 2.0956e-01, PNorm = 112.1217, GNorm = 1.4820, lr_0 = 8.7785e-04
Loss = 2.3954e-01, PNorm = 112.2527, GNorm = 0.8313, lr_0 = 8.7725e-04
Loss = 1.6488e-01, PNorm = 112.3925, GNorm = 0.6042, lr_0 = 8.7665e-04
Loss = 2.0814e-01, PNorm = 112.5147, GNorm = 1.1355, lr_0 = 8.7605e-04
Loss = 2.2369e-01, PNorm = 112.6396, GNorm = 0.8637, lr_0 = 8.7545e-04
Loss = 1.9179e-01, PNorm = 112.7623, GNorm = 0.7161, lr_0 = 8.7485e-04
Loss = 2.1666e-01, PNorm = 112.8809, GNorm = 1.3972, lr_0 = 8.7425e-04
Loss = 2.2008e-01, PNorm = 113.0111, GNorm = 0.8482, lr_0 = 8.7365e-04
Loss = 2.0276e-01, PNorm = 113.1425, GNorm = 1.1400, lr_0 = 8.7306e-04
Loss = 1.9943e-01, PNorm = 113.2646, GNorm = 0.8465, lr_0 = 8.7246e-04
Loss = 2.0560e-01, PNorm = 113.3914, GNorm = 0.9174, lr_0 = 8.7186e-04
Loss = 2.1250e-01, PNorm = 113.5106, GNorm = 0.6306, lr_0 = 8.7126e-04
Loss = 1.8763e-01, PNorm = 113.6346, GNorm = 0.9147, lr_0 = 8.7067e-04
Loss = 2.5655e-01, PNorm = 113.7752, GNorm = 1.1614, lr_0 = 8.7007e-04
Loss = 2.4936e-01, PNorm = 113.9127, GNorm = 1.0083, lr_0 = 8.6947e-04
Loss = 2.2667e-01, PNorm = 114.0486, GNorm = 0.5922, lr_0 = 8.6888e-04
Loss = 2.0114e-01, PNorm = 114.1800, GNorm = 0.8673, lr_0 = 8.6828e-04
Loss = 1.8417e-01, PNorm = 114.3146, GNorm = 0.8511, lr_0 = 8.6769e-04
Loss = 2.0236e-01, PNorm = 114.4330, GNorm = 1.3513, lr_0 = 8.6709e-04
Loss = 1.9602e-01, PNorm = 114.5523, GNorm = 1.0328, lr_0 = 8.6650e-04
Loss = 2.0421e-01, PNorm = 114.6734, GNorm = 1.1896, lr_0 = 8.6590e-04
Loss = 2.0795e-01, PNorm = 114.8040, GNorm = 0.8894, lr_0 = 8.6531e-04
Loss = 2.1573e-01, PNorm = 114.9271, GNorm = 0.5670, lr_0 = 8.6472e-04
Loss = 2.0781e-01, PNorm = 115.0488, GNorm = 0.7892, lr_0 = 8.6413e-04
Loss = 2.1189e-01, PNorm = 115.1767, GNorm = 0.8438, lr_0 = 8.6353e-04
Loss = 2.2995e-01, PNorm = 115.3059, GNorm = 0.8067, lr_0 = 8.6294e-04
Loss = 2.0746e-01, PNorm = 115.4255, GNorm = 0.8745, lr_0 = 8.6235e-04
Loss = 2.4073e-01, PNorm = 115.5531, GNorm = 0.9812, lr_0 = 8.6176e-04
Loss = 2.2289e-01, PNorm = 115.6833, GNorm = 1.5423, lr_0 = 8.6117e-04
Loss = 2.1905e-01, PNorm = 115.7964, GNorm = 0.8075, lr_0 = 8.6058e-04
Loss = 1.9618e-01, PNorm = 115.9258, GNorm = 0.8475, lr_0 = 8.5999e-04
Loss = 2.1189e-01, PNorm = 116.0411, GNorm = 0.8443, lr_0 = 8.5940e-04
Loss = 2.1440e-01, PNorm = 116.1722, GNorm = 1.1337, lr_0 = 8.5881e-04
Loss = 2.3648e-01, PNorm = 116.2904, GNorm = 0.7899, lr_0 = 8.5823e-04
Loss = 2.3984e-01, PNorm = 116.4310, GNorm = 1.4900, lr_0 = 8.5764e-04
Loss = 2.4232e-01, PNorm = 116.5723, GNorm = 0.8354, lr_0 = 8.5705e-04
Loss = 1.9901e-01, PNorm = 116.7226, GNorm = 1.0584, lr_0 = 8.5646e-04
Loss = 2.1094e-01, PNorm = 116.8470, GNorm = 0.8613, lr_0 = 8.5588e-04
Loss = 2.3518e-01, PNorm = 116.9785, GNorm = 1.0039, lr_0 = 8.5529e-04
Loss = 2.2511e-01, PNorm = 117.1154, GNorm = 0.8722, lr_0 = 8.5470e-04
Loss = 2.7359e-01, PNorm = 117.2415, GNorm = 0.7285, lr_0 = 8.5412e-04
Loss = 2.1521e-01, PNorm = 117.3814, GNorm = 0.8388, lr_0 = 8.5353e-04
Loss = 2.0964e-01, PNorm = 117.5187, GNorm = 0.9049, lr_0 = 8.5295e-04
Loss = 2.1018e-01, PNorm = 117.6463, GNorm = 0.9613, lr_0 = 8.5236e-04
Loss = 2.2379e-01, PNorm = 117.7726, GNorm = 0.8425, lr_0 = 8.5178e-04
Loss = 2.0547e-01, PNorm = 117.9067, GNorm = 0.9407, lr_0 = 8.5120e-04
Loss = 2.1930e-01, PNorm = 118.0378, GNorm = 1.6529, lr_0 = 8.5061e-04
Loss = 2.4779e-01, PNorm = 118.1633, GNorm = 0.9543, lr_0 = 8.5003e-04
Loss = 2.5894e-01, PNorm = 118.2984, GNorm = 0.7697, lr_0 = 8.4945e-04
Loss = 2.0885e-01, PNorm = 118.4347, GNorm = 1.0432, lr_0 = 8.4887e-04
Loss = 2.2222e-01, PNorm = 118.5745, GNorm = 0.8766, lr_0 = 8.4828e-04
Validation mae = 0.125106
Epoch 4
Loss = 1.1698e-01, PNorm = 118.6897, GNorm = 0.9351, lr_0 = 8.4770e-04
Loss = 1.2766e-01, PNorm = 118.7918, GNorm = 0.7806, lr_0 = 8.4712e-04
Loss = 1.0638e-01, PNorm = 118.8721, GNorm = 0.7173, lr_0 = 8.4654e-04
Loss = 1.4077e-01, PNorm = 118.9523, GNorm = 0.6764, lr_0 = 8.4596e-04
Loss = 1.3392e-01, PNorm = 119.0340, GNorm = 0.5420, lr_0 = 8.4538e-04
Loss = 1.0655e-01, PNorm = 119.1166, GNorm = 0.5633, lr_0 = 8.4480e-04
Loss = 1.3655e-01, PNorm = 119.1936, GNorm = 0.6790, lr_0 = 8.4423e-04
Loss = 1.1264e-01, PNorm = 119.2639, GNorm = 0.7458, lr_0 = 8.4365e-04
Loss = 1.1805e-01, PNorm = 119.3364, GNorm = 0.7786, lr_0 = 8.4307e-04
Loss = 1.2783e-01, PNorm = 119.4110, GNorm = 0.7223, lr_0 = 8.4249e-04
Loss = 1.4847e-01, PNorm = 119.4910, GNorm = 0.8116, lr_0 = 8.4191e-04
Loss = 1.3117e-01, PNorm = 119.5749, GNorm = 0.7346, lr_0 = 8.4134e-04
Loss = 1.2664e-01, PNorm = 119.6599, GNorm = 0.6518, lr_0 = 8.4076e-04
Loss = 1.1851e-01, PNorm = 119.7459, GNorm = 0.8809, lr_0 = 8.4019e-04
Loss = 1.1461e-01, PNorm = 119.8257, GNorm = 0.6144, lr_0 = 8.3961e-04
Loss = 1.2238e-01, PNorm = 119.9023, GNorm = 0.7084, lr_0 = 8.3903e-04
Loss = 1.1533e-01, PNorm = 119.9820, GNorm = 1.3539, lr_0 = 8.3846e-04
Loss = 1.2889e-01, PNorm = 120.0599, GNorm = 0.5976, lr_0 = 8.3789e-04
Loss = 1.4006e-01, PNorm = 120.1326, GNorm = 0.6517, lr_0 = 8.3731e-04
Loss = 1.2015e-01, PNorm = 120.2233, GNorm = 1.2895, lr_0 = 8.3674e-04
Loss = 1.3355e-01, PNorm = 120.2999, GNorm = 1.0680, lr_0 = 8.3616e-04
Loss = 1.1961e-01, PNorm = 120.3975, GNorm = 1.0628, lr_0 = 8.3559e-04
Loss = 1.0617e-01, PNorm = 120.4809, GNorm = 0.5212, lr_0 = 8.3502e-04
Loss = 1.2672e-01, PNorm = 120.5627, GNorm = 1.0164, lr_0 = 8.3445e-04
Loss = 9.2342e-02, PNorm = 120.6402, GNorm = 0.4964, lr_0 = 8.3388e-04
Loss = 1.1015e-01, PNorm = 120.7062, GNorm = 0.6998, lr_0 = 8.3330e-04
Loss = 1.0998e-01, PNorm = 120.7876, GNorm = 0.6427, lr_0 = 8.3273e-04
Loss = 1.1837e-01, PNorm = 120.8674, GNorm = 0.6516, lr_0 = 8.3216e-04
Loss = 1.2477e-01, PNorm = 120.9480, GNorm = 1.0478, lr_0 = 8.3159e-04
Loss = 1.2344e-01, PNorm = 121.0327, GNorm = 0.9355, lr_0 = 8.3102e-04
Loss = 1.1048e-01, PNorm = 121.1216, GNorm = 0.6945, lr_0 = 8.3045e-04
Loss = 1.4335e-01, PNorm = 121.2047, GNorm = 0.7257, lr_0 = 8.2988e-04
Loss = 1.1272e-01, PNorm = 121.3024, GNorm = 0.8752, lr_0 = 8.2932e-04
Loss = 1.1312e-01, PNorm = 121.3910, GNorm = 0.6336, lr_0 = 8.2875e-04
Loss = 1.1364e-01, PNorm = 121.4766, GNorm = 0.6076, lr_0 = 8.2818e-04
Loss = 1.2443e-01, PNorm = 121.5660, GNorm = 0.6816, lr_0 = 8.2761e-04
Loss = 1.1205e-01, PNorm = 121.6489, GNorm = 0.4809, lr_0 = 8.2705e-04
Loss = 1.1231e-01, PNorm = 121.7336, GNorm = 0.5850, lr_0 = 8.2648e-04
Loss = 9.9025e-02, PNorm = 121.8128, GNorm = 0.7276, lr_0 = 8.2591e-04
Loss = 1.1542e-01, PNorm = 121.8955, GNorm = 0.8790, lr_0 = 8.2535e-04
Loss = 1.1782e-01, PNorm = 121.9714, GNorm = 0.6435, lr_0 = 8.2478e-04
Loss = 1.0164e-01, PNorm = 122.0541, GNorm = 1.0584, lr_0 = 8.2422e-04
Loss = 1.1915e-01, PNorm = 122.1242, GNorm = 0.6446, lr_0 = 8.2365e-04
Loss = 1.4070e-01, PNorm = 122.2067, GNorm = 1.3694, lr_0 = 8.2309e-04
Loss = 1.2019e-01, PNorm = 122.2828, GNorm = 0.6661, lr_0 = 8.2252e-04
Loss = 1.2531e-01, PNorm = 122.3666, GNorm = 0.7512, lr_0 = 8.2196e-04
Loss = 1.1297e-01, PNorm = 122.4571, GNorm = 0.8578, lr_0 = 8.2140e-04
Loss = 1.3566e-01, PNorm = 122.5467, GNorm = 0.7163, lr_0 = 8.2084e-04
Loss = 1.3029e-01, PNorm = 122.6227, GNorm = 1.1518, lr_0 = 8.2027e-04
Loss = 1.2591e-01, PNorm = 122.7093, GNorm = 0.8736, lr_0 = 8.1971e-04
Loss = 1.2409e-01, PNorm = 122.8021, GNorm = 0.5798, lr_0 = 8.1915e-04
Loss = 1.4119e-01, PNorm = 122.9028, GNorm = 0.5114, lr_0 = 8.1859e-04
Loss = 1.3970e-01, PNorm = 122.9903, GNorm = 0.6113, lr_0 = 8.1803e-04
Loss = 1.2536e-01, PNorm = 123.0798, GNorm = 0.9758, lr_0 = 8.1747e-04
Loss = 1.4208e-01, PNorm = 123.1666, GNorm = 1.1340, lr_0 = 8.1691e-04
Loss = 1.3083e-01, PNorm = 123.2616, GNorm = 0.9584, lr_0 = 8.1635e-04
Loss = 1.2381e-01, PNorm = 123.3621, GNorm = 0.8971, lr_0 = 8.1579e-04
Loss = 1.1882e-01, PNorm = 123.4566, GNorm = 0.3620, lr_0 = 8.1523e-04
Loss = 1.4870e-01, PNorm = 123.5609, GNorm = 0.9297, lr_0 = 8.1467e-04
Loss = 1.3107e-01, PNorm = 123.6686, GNorm = 0.5457, lr_0 = 8.1411e-04
Loss = 1.1303e-01, PNorm = 123.7729, GNorm = 0.7484, lr_0 = 8.1355e-04
Loss = 1.1818e-01, PNorm = 123.8693, GNorm = 0.7100, lr_0 = 8.1300e-04
Loss = 1.2074e-01, PNorm = 123.9663, GNorm = 0.9981, lr_0 = 8.1244e-04
Loss = 1.2145e-01, PNorm = 124.0554, GNorm = 0.4723, lr_0 = 8.1188e-04
Loss = 1.1766e-01, PNorm = 124.1510, GNorm = 0.5355, lr_0 = 8.1133e-04
Loss = 1.4456e-01, PNorm = 124.2503, GNorm = 0.5881, lr_0 = 8.1077e-04
Loss = 1.2054e-01, PNorm = 124.3445, GNorm = 1.1124, lr_0 = 8.1022e-04
Loss = 1.2854e-01, PNorm = 124.4406, GNorm = 1.0033, lr_0 = 8.0966e-04
Loss = 1.0908e-01, PNorm = 124.5227, GNorm = 0.6453, lr_0 = 8.0911e-04
Loss = 1.9882e-01, PNorm = 124.6175, GNorm = 0.7048, lr_0 = 8.0855e-04
Loss = 1.4422e-01, PNorm = 124.7239, GNorm = 0.8769, lr_0 = 8.0800e-04
Loss = 1.3693e-01, PNorm = 124.8343, GNorm = 0.7614, lr_0 = 8.0745e-04
Loss = 1.3104e-01, PNorm = 124.9484, GNorm = 0.7719, lr_0 = 8.0689e-04
Loss = 1.3504e-01, PNorm = 125.0571, GNorm = 0.5447, lr_0 = 8.0634e-04
Loss = 1.5435e-01, PNorm = 125.1582, GNorm = 0.6046, lr_0 = 8.0579e-04
Loss = 1.3066e-01, PNorm = 125.2606, GNorm = 0.7159, lr_0 = 8.0523e-04
Loss = 1.1573e-01, PNorm = 125.3662, GNorm = 0.8468, lr_0 = 8.0468e-04
Loss = 1.5074e-01, PNorm = 125.4618, GNorm = 0.7408, lr_0 = 8.0413e-04
Loss = 1.5397e-01, PNorm = 125.5563, GNorm = 0.5939, lr_0 = 8.0358e-04
Loss = 1.3304e-01, PNorm = 125.6653, GNorm = 0.7528, lr_0 = 8.0303e-04
Loss = 1.4168e-01, PNorm = 125.7796, GNorm = 0.6164, lr_0 = 8.0248e-04
Loss = 1.2731e-01, PNorm = 125.8935, GNorm = 0.6463, lr_0 = 8.0193e-04
Loss = 1.3443e-01, PNorm = 126.0051, GNorm = 0.6341, lr_0 = 8.0138e-04
Loss = 1.3502e-01, PNorm = 126.1125, GNorm = 0.7817, lr_0 = 8.0083e-04
Loss = 1.5346e-01, PNorm = 126.2238, GNorm = 0.9845, lr_0 = 8.0028e-04
Loss = 1.3448e-01, PNorm = 126.3188, GNorm = 0.8902, lr_0 = 7.9974e-04
Loss = 1.4741e-01, PNorm = 126.4251, GNorm = 1.5550, lr_0 = 7.9919e-04
Loss = 1.3097e-01, PNorm = 126.5293, GNorm = 0.8691, lr_0 = 7.9864e-04
Loss = 1.3482e-01, PNorm = 126.6332, GNorm = 1.4326, lr_0 = 7.9809e-04
Loss = 1.7190e-01, PNorm = 126.7402, GNorm = 0.5998, lr_0 = 7.9755e-04
Loss = 1.6399e-01, PNorm = 126.8522, GNorm = 0.7389, lr_0 = 7.9700e-04
Loss = 1.2558e-01, PNorm = 126.9601, GNorm = 0.7875, lr_0 = 7.9645e-04
Loss = 1.8531e-01, PNorm = 127.0640, GNorm = 0.9071, lr_0 = 7.9591e-04
Loss = 1.3253e-01, PNorm = 127.1707, GNorm = 1.0009, lr_0 = 7.9536e-04
Loss = 1.4876e-01, PNorm = 127.2692, GNorm = 0.6030, lr_0 = 7.9482e-04
Loss = 1.2592e-01, PNorm = 127.3809, GNorm = 0.8101, lr_0 = 7.9427e-04
Loss = 1.1818e-01, PNorm = 127.4824, GNorm = 1.1855, lr_0 = 7.9373e-04
Loss = 1.4158e-01, PNorm = 127.5814, GNorm = 0.8163, lr_0 = 7.9319e-04
Loss = 1.2952e-01, PNorm = 127.6837, GNorm = 0.5799, lr_0 = 7.9264e-04
Loss = 1.3638e-01, PNorm = 127.7819, GNorm = 0.7882, lr_0 = 7.9210e-04
Loss = 1.7964e-01, PNorm = 127.8939, GNorm = 1.0664, lr_0 = 7.9156e-04
Loss = 1.4555e-01, PNorm = 128.0115, GNorm = 0.8304, lr_0 = 7.9101e-04
Loss = 1.4554e-01, PNorm = 128.1259, GNorm = 0.5883, lr_0 = 7.9047e-04
Loss = 1.5825e-01, PNorm = 128.2380, GNorm = 0.5598, lr_0 = 7.8993e-04
Loss = 1.4803e-01, PNorm = 128.3525, GNorm = 1.1257, lr_0 = 7.8939e-04
Loss = 1.3929e-01, PNorm = 128.4576, GNorm = 0.9405, lr_0 = 7.8885e-04
Loss = 1.3520e-01, PNorm = 128.5582, GNorm = 1.0795, lr_0 = 7.8831e-04
Loss = 1.6962e-01, PNorm = 128.6582, GNorm = 1.2758, lr_0 = 7.8777e-04
Loss = 1.4262e-01, PNorm = 128.7695, GNorm = 0.7241, lr_0 = 7.8723e-04
Loss = 1.4875e-01, PNorm = 128.8767, GNorm = 0.8362, lr_0 = 7.8669e-04
Loss = 1.3476e-01, PNorm = 128.9889, GNorm = 0.6583, lr_0 = 7.8615e-04
Loss = 1.6535e-01, PNorm = 129.0944, GNorm = 2.0935, lr_0 = 7.8561e-04
Loss = 1.2997e-01, PNorm = 129.1925, GNorm = 0.5055, lr_0 = 7.8507e-04
Loss = 1.5401e-01, PNorm = 129.3059, GNorm = 0.8645, lr_0 = 7.8454e-04
Loss = 1.6218e-01, PNorm = 129.4072, GNorm = 0.7154, lr_0 = 7.8400e-04
Loss = 1.6610e-01, PNorm = 129.5190, GNorm = 0.6172, lr_0 = 7.8346e-04
Loss = 1.4095e-01, PNorm = 129.6220, GNorm = 0.5034, lr_0 = 7.8293e-04
Loss = 1.7441e-01, PNorm = 129.7323, GNorm = 1.0339, lr_0 = 7.8239e-04
Loss = 1.3263e-01, PNorm = 129.8389, GNorm = 0.5282, lr_0 = 7.8185e-04
Loss = 1.5886e-01, PNorm = 129.9440, GNorm = 0.7555, lr_0 = 7.8132e-04
Validation mae = 0.126953
Epoch 5
Loss = 8.3014e-02, PNorm = 130.0400, GNorm = 0.5162, lr_0 = 7.8078e-04
Loss = 9.5989e-02, PNorm = 130.1219, GNorm = 0.6610, lr_0 = 7.8025e-04
Loss = 1.0235e-01, PNorm = 130.1952, GNorm = 0.6661, lr_0 = 7.7971e-04
Loss = 9.1709e-02, PNorm = 130.2658, GNorm = 0.7040, lr_0 = 7.7918e-04
Loss = 8.2248e-02, PNorm = 130.3256, GNorm = 0.5790, lr_0 = 7.7864e-04
Loss = 7.8295e-02, PNorm = 130.3840, GNorm = 0.6461, lr_0 = 7.7811e-04
Loss = 8.9142e-02, PNorm = 130.4441, GNorm = 0.6535, lr_0 = 7.7758e-04
Loss = 7.5200e-02, PNorm = 130.5097, GNorm = 1.4473, lr_0 = 7.7705e-04
Loss = 7.8404e-02, PNorm = 130.5668, GNorm = 0.8343, lr_0 = 7.7651e-04
Loss = 8.1205e-02, PNorm = 130.6315, GNorm = 0.5552, lr_0 = 7.7598e-04
Loss = 8.0454e-02, PNorm = 130.6924, GNorm = 0.6486, lr_0 = 7.7545e-04
Loss = 7.1267e-02, PNorm = 130.7445, GNorm = 0.4886, lr_0 = 7.7492e-04
Loss = 6.9525e-02, PNorm = 130.7961, GNorm = 0.5789, lr_0 = 7.7439e-04
Loss = 9.5817e-02, PNorm = 130.8528, GNorm = 0.6473, lr_0 = 7.7386e-04
Loss = 8.1647e-02, PNorm = 130.9142, GNorm = 0.8413, lr_0 = 7.7333e-04
Loss = 8.1985e-02, PNorm = 130.9780, GNorm = 0.5071, lr_0 = 7.7280e-04
Loss = 7.7003e-02, PNorm = 131.0385, GNorm = 0.8490, lr_0 = 7.7227e-04
Loss = 7.9984e-02, PNorm = 131.0969, GNorm = 0.7465, lr_0 = 7.7174e-04
Loss = 8.5947e-02, PNorm = 131.1627, GNorm = 0.5987, lr_0 = 7.7121e-04
Loss = 8.8149e-02, PNorm = 131.2222, GNorm = 0.4681, lr_0 = 7.7068e-04
Loss = 9.2793e-02, PNorm = 131.2936, GNorm = 0.6360, lr_0 = 7.7015e-04
Loss = 8.7099e-02, PNorm = 131.3491, GNorm = 0.7072, lr_0 = 7.6963e-04
Loss = 9.3847e-02, PNorm = 131.4060, GNorm = 1.3536, lr_0 = 7.6910e-04
Loss = 9.1136e-02, PNorm = 131.4740, GNorm = 0.4004, lr_0 = 7.6857e-04
Loss = 9.0317e-02, PNorm = 131.5446, GNorm = 0.5943, lr_0 = 7.6805e-04
Loss = 8.4520e-02, PNorm = 131.6214, GNorm = 0.7163, lr_0 = 7.6752e-04
Loss = 9.3653e-02, PNorm = 131.6882, GNorm = 0.4575, lr_0 = 7.6699e-04
Loss = 8.6079e-02, PNorm = 131.7549, GNorm = 0.4193, lr_0 = 7.6647e-04
Loss = 6.4176e-02, PNorm = 131.8232, GNorm = 0.6087, lr_0 = 7.6594e-04
Loss = 8.3043e-02, PNorm = 131.8845, GNorm = 0.7507, lr_0 = 7.6542e-04
Loss = 8.6036e-02, PNorm = 131.9448, GNorm = 0.6348, lr_0 = 7.6489e-04
Loss = 9.0917e-02, PNorm = 132.0114, GNorm = 1.0675, lr_0 = 7.6437e-04
Loss = 8.3061e-02, PNorm = 132.0875, GNorm = 0.5403, lr_0 = 7.6385e-04
Loss = 8.8660e-02, PNorm = 132.1601, GNorm = 0.5376, lr_0 = 7.6332e-04
Loss = 7.8054e-02, PNorm = 132.2362, GNorm = 0.5005, lr_0 = 7.6280e-04
Loss = 8.3678e-02, PNorm = 132.3002, GNorm = 0.5790, lr_0 = 7.6228e-04
Loss = 8.9691e-02, PNorm = 132.3668, GNorm = 0.7101, lr_0 = 7.6176e-04
Loss = 9.2721e-02, PNorm = 132.4329, GNorm = 0.7618, lr_0 = 7.6123e-04
Loss = 8.5515e-02, PNorm = 132.5094, GNorm = 0.9063, lr_0 = 7.6071e-04
Loss = 7.3880e-02, PNorm = 132.5706, GNorm = 0.6280, lr_0 = 7.6019e-04
Loss = 7.6124e-02, PNorm = 132.6401, GNorm = 0.9444, lr_0 = 7.5967e-04
Loss = 7.5008e-02, PNorm = 132.6971, GNorm = 0.5786, lr_0 = 7.5915e-04
Loss = 7.4126e-02, PNorm = 132.7694, GNorm = 0.6718, lr_0 = 7.5863e-04
Loss = 1.0807e-01, PNorm = 132.8275, GNorm = 0.9984, lr_0 = 7.5811e-04
Loss = 7.5856e-02, PNorm = 132.8948, GNorm = 0.4763, lr_0 = 7.5759e-04
Loss = 7.8890e-02, PNorm = 132.9623, GNorm = 0.6545, lr_0 = 7.5707e-04
Loss = 1.0399e-01, PNorm = 133.0318, GNorm = 1.0028, lr_0 = 7.5655e-04
Loss = 7.7003e-02, PNorm = 133.0973, GNorm = 0.6160, lr_0 = 7.5603e-04
Loss = 9.9841e-02, PNorm = 133.1785, GNorm = 0.9002, lr_0 = 7.5552e-04
Loss = 7.8783e-02, PNorm = 133.2463, GNorm = 0.6909, lr_0 = 7.5500e-04
Loss = 8.7155e-02, PNorm = 133.3143, GNorm = 0.9722, lr_0 = 7.5448e-04
Loss = 8.8633e-02, PNorm = 133.3892, GNorm = 1.0783, lr_0 = 7.5397e-04
Loss = 9.0210e-02, PNorm = 133.4532, GNorm = 0.7827, lr_0 = 7.5345e-04
Loss = 9.1254e-02, PNorm = 133.5227, GNorm = 1.7448, lr_0 = 7.5293e-04
Loss = 8.6071e-02, PNorm = 133.6006, GNorm = 0.5465, lr_0 = 7.5242e-04
Loss = 8.9018e-02, PNorm = 133.6733, GNorm = 0.5946, lr_0 = 7.5190e-04
Loss = 9.1993e-02, PNorm = 133.7451, GNorm = 0.6771, lr_0 = 7.5139e-04
Loss = 1.0740e-01, PNorm = 133.8299, GNorm = 0.5428, lr_0 = 7.5087e-04
Loss = 7.1765e-02, PNorm = 133.9072, GNorm = 0.5463, lr_0 = 7.5036e-04
Loss = 8.7769e-02, PNorm = 133.9685, GNorm = 0.6209, lr_0 = 7.4984e-04
Loss = 8.2565e-02, PNorm = 134.0391, GNorm = 0.7819, lr_0 = 7.4933e-04
Loss = 9.0397e-02, PNorm = 134.1167, GNorm = 0.5780, lr_0 = 7.4882e-04
Loss = 8.3200e-02, PNorm = 134.1814, GNorm = 0.6714, lr_0 = 7.4830e-04
Loss = 8.3541e-02, PNorm = 134.2540, GNorm = 0.4798, lr_0 = 7.4779e-04
Loss = 8.7953e-02, PNorm = 134.3308, GNorm = 0.6793, lr_0 = 7.4728e-04
Loss = 8.3571e-02, PNorm = 134.4111, GNorm = 0.5680, lr_0 = 7.4677e-04
Loss = 7.8564e-02, PNorm = 134.4901, GNorm = 0.6246, lr_0 = 7.4625e-04
Loss = 9.6861e-02, PNorm = 134.5648, GNorm = 0.9832, lr_0 = 7.4574e-04
Loss = 9.2754e-02, PNorm = 134.6476, GNorm = 0.5443, lr_0 = 7.4523e-04
Loss = 1.0124e-01, PNorm = 134.7277, GNorm = 0.5782, lr_0 = 7.4472e-04
Loss = 1.0471e-01, PNorm = 134.8062, GNorm = 0.4966, lr_0 = 7.4421e-04
Loss = 8.5032e-02, PNorm = 134.8732, GNorm = 0.6543, lr_0 = 7.4370e-04
Loss = 9.3410e-02, PNorm = 134.9508, GNorm = 0.5107, lr_0 = 7.4319e-04
Loss = 9.6375e-02, PNorm = 135.0287, GNorm = 0.7366, lr_0 = 7.4268e-04
Loss = 1.0250e-01, PNorm = 135.1222, GNorm = 1.2242, lr_0 = 7.4217e-04
Loss = 8.0532e-02, PNorm = 135.2055, GNorm = 0.5262, lr_0 = 7.4167e-04
Loss = 9.2405e-02, PNorm = 135.2930, GNorm = 0.8470, lr_0 = 7.4116e-04
Loss = 9.5008e-02, PNorm = 135.3680, GNorm = 0.4355, lr_0 = 7.4065e-04
Loss = 9.3310e-02, PNorm = 135.4455, GNorm = 0.5163, lr_0 = 7.4014e-04
Loss = 1.0201e-01, PNorm = 135.5240, GNorm = 1.0309, lr_0 = 7.3964e-04
Loss = 1.1368e-01, PNorm = 135.6108, GNorm = 0.5019, lr_0 = 7.3913e-04
Loss = 9.9323e-02, PNorm = 135.7045, GNorm = 0.7831, lr_0 = 7.3862e-04
Loss = 8.9848e-02, PNorm = 135.8002, GNorm = 0.6299, lr_0 = 7.3812e-04
Loss = 9.7851e-02, PNorm = 135.8923, GNorm = 0.3912, lr_0 = 7.3761e-04
Loss = 8.6183e-02, PNorm = 135.9762, GNorm = 0.7956, lr_0 = 7.3711e-04
Loss = 8.3381e-02, PNorm = 136.0561, GNorm = 0.5705, lr_0 = 7.3660e-04
Loss = 1.0715e-01, PNorm = 136.1393, GNorm = 0.7422, lr_0 = 7.3610e-04
Loss = 1.1002e-01, PNorm = 136.2301, GNorm = 1.1152, lr_0 = 7.3559e-04
Loss = 8.5177e-02, PNorm = 136.3145, GNorm = 0.5173, lr_0 = 7.3509e-04
Loss = 1.1358e-01, PNorm = 136.4084, GNorm = 0.6325, lr_0 = 7.3458e-04
Loss = 1.0613e-01, PNorm = 136.4965, GNorm = 1.1065, lr_0 = 7.3408e-04
Loss = 8.7025e-02, PNorm = 136.5939, GNorm = 0.6139, lr_0 = 7.3358e-04
Loss = 8.3815e-02, PNorm = 136.6787, GNorm = 0.5497, lr_0 = 7.3308e-04
Loss = 7.8515e-02, PNorm = 136.7630, GNorm = 0.4189, lr_0 = 7.3257e-04
Loss = 8.7685e-02, PNorm = 136.8430, GNorm = 0.7119, lr_0 = 7.3207e-04
Loss = 1.0035e-01, PNorm = 136.9264, GNorm = 0.4909, lr_0 = 7.3157e-04
Loss = 9.0011e-02, PNorm = 137.0105, GNorm = 0.6177, lr_0 = 7.3107e-04
Loss = 1.0334e-01, PNorm = 137.0947, GNorm = 0.8104, lr_0 = 7.3057e-04
Loss = 8.8392e-02, PNorm = 137.1760, GNorm = 0.5327, lr_0 = 7.3007e-04
Loss = 1.0151e-01, PNorm = 137.2617, GNorm = 0.5306, lr_0 = 7.2957e-04
Loss = 9.0878e-02, PNorm = 137.3495, GNorm = 0.6269, lr_0 = 7.2907e-04
Loss = 1.0816e-01, PNorm = 137.4305, GNorm = 0.8657, lr_0 = 7.2857e-04
Loss = 9.8632e-02, PNorm = 137.5265, GNorm = 1.7141, lr_0 = 7.2807e-04
Loss = 9.0408e-02, PNorm = 137.6234, GNorm = 0.6595, lr_0 = 7.2757e-04
Loss = 8.2579e-02, PNorm = 137.7161, GNorm = 0.5368, lr_0 = 7.2707e-04
Loss = 1.1078e-01, PNorm = 137.8037, GNorm = 1.0172, lr_0 = 7.2657e-04
Loss = 7.9979e-02, PNorm = 137.8869, GNorm = 0.5024, lr_0 = 7.2608e-04
Loss = 7.6496e-02, PNorm = 137.9684, GNorm = 0.7923, lr_0 = 7.2558e-04
Loss = 1.0498e-01, PNorm = 138.0440, GNorm = 0.6521, lr_0 = 7.2508e-04
Loss = 9.4691e-02, PNorm = 138.1233, GNorm = 0.5104, lr_0 = 7.2458e-04
Loss = 1.1939e-01, PNorm = 138.2155, GNorm = 0.5823, lr_0 = 7.2409e-04
Loss = 1.0490e-01, PNorm = 138.3039, GNorm = 0.6389, lr_0 = 7.2359e-04
Loss = 9.6695e-02, PNorm = 138.3919, GNorm = 0.9610, lr_0 = 7.2310e-04
Loss = 9.3288e-02, PNorm = 138.4755, GNorm = 0.7738, lr_0 = 7.2260e-04
Loss = 9.9810e-02, PNorm = 138.5681, GNorm = 0.7648, lr_0 = 7.2211e-04
Loss = 1.1448e-01, PNorm = 138.6647, GNorm = 0.4130, lr_0 = 7.2161e-04
Loss = 9.4065e-02, PNorm = 138.7575, GNorm = 0.6060, lr_0 = 7.2112e-04
Loss = 1.1545e-01, PNorm = 138.8474, GNorm = 1.0735, lr_0 = 7.2062e-04
Loss = 1.1421e-01, PNorm = 138.9418, GNorm = 0.9410, lr_0 = 7.2013e-04
Loss = 1.1949e-01, PNorm = 139.0376, GNorm = 1.0428, lr_0 = 7.1964e-04
Validation mae = 0.123450
Epoch 6
Loss = 8.7100e-02, PNorm = 139.1231, GNorm = 1.0548, lr_0 = 7.1914e-04
Loss = 8.0598e-02, PNorm = 139.1961, GNorm = 1.5674, lr_0 = 7.1865e-04
Loss = 6.4958e-02, PNorm = 139.2476, GNorm = 0.6127, lr_0 = 7.1816e-04
Loss = 7.4915e-02, PNorm = 139.3017, GNorm = 0.4736, lr_0 = 7.1767e-04
Loss = 5.9181e-02, PNorm = 139.3601, GNorm = 0.4687, lr_0 = 7.1717e-04
Loss = 6.1222e-02, PNorm = 139.4081, GNorm = 0.4394, lr_0 = 7.1668e-04
Loss = 5.5406e-02, PNorm = 139.4613, GNorm = 0.7670, lr_0 = 7.1619e-04
Loss = 5.6099e-02, PNorm = 139.5073, GNorm = 0.3708, lr_0 = 7.1570e-04
Loss = 6.1971e-02, PNorm = 139.5598, GNorm = 0.6099, lr_0 = 7.1521e-04
Loss = 5.5783e-02, PNorm = 139.6039, GNorm = 0.3046, lr_0 = 7.1472e-04
Loss = 8.1247e-02, PNorm = 139.6607, GNorm = 0.5605, lr_0 = 7.1423e-04
Loss = 5.8484e-02, PNorm = 139.7160, GNorm = 0.5529, lr_0 = 7.1374e-04
Loss = 5.4986e-02, PNorm = 139.7663, GNorm = 0.4840, lr_0 = 7.1325e-04
Loss = 6.2230e-02, PNorm = 139.8204, GNorm = 0.3556, lr_0 = 7.1277e-04
Loss = 6.7873e-02, PNorm = 139.8739, GNorm = 0.4672, lr_0 = 7.1228e-04
Loss = 6.1335e-02, PNorm = 139.9252, GNorm = 0.5368, lr_0 = 7.1179e-04
Loss = 7.6625e-02, PNorm = 139.9887, GNorm = 0.7682, lr_0 = 7.1130e-04
Loss = 6.7372e-02, PNorm = 140.0412, GNorm = 0.5541, lr_0 = 7.1081e-04
Loss = 6.8448e-02, PNorm = 140.0877, GNorm = 0.4807, lr_0 = 7.1033e-04
Loss = 6.0234e-02, PNorm = 140.1444, GNorm = 0.5931, lr_0 = 7.0984e-04
Loss = 5.9273e-02, PNorm = 140.1975, GNorm = 0.4852, lr_0 = 7.0935e-04
Loss = 7.4682e-02, PNorm = 140.2515, GNorm = 0.4764, lr_0 = 7.0887e-04
Loss = 6.3248e-02, PNorm = 140.3113, GNorm = 0.9362, lr_0 = 7.0838e-04
Loss = 5.7739e-02, PNorm = 140.3725, GNorm = 0.4894, lr_0 = 7.0790e-04
Loss = 6.1207e-02, PNorm = 140.4215, GNorm = 0.6623, lr_0 = 7.0741e-04
Loss = 5.4105e-02, PNorm = 140.4787, GNorm = 0.5486, lr_0 = 7.0693e-04
Loss = 6.4988e-02, PNorm = 140.5280, GNorm = 0.7189, lr_0 = 7.0644e-04
Loss = 4.8953e-02, PNorm = 140.5792, GNorm = 0.8023, lr_0 = 7.0596e-04
Loss = 6.4486e-02, PNorm = 140.6307, GNorm = 0.6319, lr_0 = 7.0548e-04
Loss = 5.9875e-02, PNorm = 140.6868, GNorm = 0.6524, lr_0 = 7.0499e-04
Loss = 5.9266e-02, PNorm = 140.7417, GNorm = 0.7029, lr_0 = 7.0451e-04
Loss = 5.9589e-02, PNorm = 140.7919, GNorm = 0.6178, lr_0 = 7.0403e-04
Loss = 5.0593e-02, PNorm = 140.8431, GNorm = 0.4483, lr_0 = 7.0354e-04
Loss = 6.0702e-02, PNorm = 140.8876, GNorm = 0.6362, lr_0 = 7.0306e-04
Loss = 5.5556e-02, PNorm = 140.9423, GNorm = 0.3676, lr_0 = 7.0258e-04
Loss = 5.0859e-02, PNorm = 140.9936, GNorm = 0.5217, lr_0 = 7.0210e-04
Loss = 5.3713e-02, PNorm = 141.0320, GNorm = 0.6359, lr_0 = 7.0162e-04
Loss = 6.4731e-02, PNorm = 141.0807, GNorm = 0.6824, lr_0 = 7.0114e-04
Loss = 5.6549e-02, PNorm = 141.1403, GNorm = 0.8326, lr_0 = 7.0066e-04
Loss = 6.9062e-02, PNorm = 141.2012, GNorm = 0.9462, lr_0 = 7.0018e-04
Loss = 5.9340e-02, PNorm = 141.2555, GNorm = 0.5659, lr_0 = 6.9970e-04
Loss = 7.6470e-02, PNorm = 141.3183, GNorm = 0.7349, lr_0 = 6.9922e-04
Loss = 6.5121e-02, PNorm = 141.3751, GNorm = 0.4512, lr_0 = 6.9874e-04
Loss = 6.8708e-02, PNorm = 141.4338, GNorm = 0.5717, lr_0 = 6.9826e-04
Loss = 5.7746e-02, PNorm = 141.5000, GNorm = 1.1348, lr_0 = 6.9778e-04
Loss = 6.7356e-02, PNorm = 141.5572, GNorm = 0.5329, lr_0 = 6.9730e-04
Loss = 6.1144e-02, PNorm = 141.6225, GNorm = 0.5328, lr_0 = 6.9683e-04
Loss = 6.3881e-02, PNorm = 141.6835, GNorm = 0.6919, lr_0 = 6.9635e-04
Loss = 6.5931e-02, PNorm = 141.7395, GNorm = 0.5740, lr_0 = 6.9587e-04
Loss = 6.1082e-02, PNorm = 141.8000, GNorm = 0.3943, lr_0 = 6.9540e-04
Loss = 5.4197e-02, PNorm = 141.8584, GNorm = 0.5041, lr_0 = 6.9492e-04
Loss = 6.6294e-02, PNorm = 141.9148, GNorm = 0.6062, lr_0 = 6.9444e-04
Loss = 4.6031e-02, PNorm = 141.9656, GNorm = 0.4901, lr_0 = 6.9397e-04
Loss = 5.8576e-02, PNorm = 142.0202, GNorm = 0.4061, lr_0 = 6.9349e-04
Loss = 6.5821e-02, PNorm = 142.0801, GNorm = 0.7721, lr_0 = 6.9302e-04
Loss = 4.9455e-02, PNorm = 142.1376, GNorm = 0.4791, lr_0 = 6.9254e-04
Loss = 5.9773e-02, PNorm = 142.1980, GNorm = 0.6221, lr_0 = 6.9207e-04
Loss = 8.0479e-02, PNorm = 142.2598, GNorm = 0.6349, lr_0 = 6.9159e-04
Loss = 1.1195e-01, PNorm = 142.3353, GNorm = 0.3541, lr_0 = 6.9112e-04
Loss = 7.3373e-02, PNorm = 142.4192, GNorm = 0.6384, lr_0 = 6.9065e-04
Loss = 7.4073e-02, PNorm = 142.4887, GNorm = 0.8286, lr_0 = 6.9017e-04
Loss = 5.6408e-02, PNorm = 142.5557, GNorm = 0.7098, lr_0 = 6.8970e-04
Loss = 7.0262e-02, PNorm = 142.6205, GNorm = 0.7683, lr_0 = 6.8923e-04
Loss = 7.1856e-02, PNorm = 142.6964, GNorm = 0.5717, lr_0 = 6.8876e-04
Loss = 7.3602e-02, PNorm = 142.7649, GNorm = 0.4755, lr_0 = 6.8828e-04
Loss = 5.9958e-02, PNorm = 142.8391, GNorm = 0.4028, lr_0 = 6.8781e-04
Loss = 6.4614e-02, PNorm = 142.9040, GNorm = 0.8400, lr_0 = 6.8734e-04
Loss = 7.2406e-02, PNorm = 142.9674, GNorm = 0.5239, lr_0 = 6.8687e-04
Loss = 8.1990e-02, PNorm = 143.0355, GNorm = 0.6159, lr_0 = 6.8640e-04
Loss = 6.7058e-02, PNorm = 143.1064, GNorm = 0.5909, lr_0 = 6.8593e-04
Loss = 6.9419e-02, PNorm = 143.1725, GNorm = 0.7830, lr_0 = 6.8546e-04
Loss = 7.5881e-02, PNorm = 143.2365, GNorm = 0.5330, lr_0 = 6.8499e-04
Loss = 5.9495e-02, PNorm = 143.2953, GNorm = 0.3927, lr_0 = 6.8452e-04
Loss = 6.8469e-02, PNorm = 143.3616, GNorm = 0.6919, lr_0 = 6.8405e-04
Loss = 7.4419e-02, PNorm = 143.4254, GNorm = 0.6406, lr_0 = 6.8358e-04
Loss = 6.2889e-02, PNorm = 143.4964, GNorm = 0.4733, lr_0 = 6.8312e-04
Loss = 6.2206e-02, PNorm = 143.5609, GNorm = 0.6765, lr_0 = 6.8265e-04
Loss = 7.2333e-02, PNorm = 143.6255, GNorm = 0.5704, lr_0 = 6.8218e-04
Loss = 7.6956e-02, PNorm = 143.6925, GNorm = 0.5398, lr_0 = 6.8171e-04
Loss = 7.0435e-02, PNorm = 143.7532, GNorm = 0.8831, lr_0 = 6.8125e-04
Loss = 5.4113e-02, PNorm = 143.8245, GNorm = 0.5885, lr_0 = 6.8078e-04
Loss = 8.9462e-02, PNorm = 143.8953, GNorm = 0.5651, lr_0 = 6.8031e-04
Loss = 6.7931e-02, PNorm = 143.9698, GNorm = 0.6378, lr_0 = 6.7985e-04
Loss = 5.1804e-02, PNorm = 144.0406, GNorm = 0.4975, lr_0 = 6.7938e-04
Loss = 7.0260e-02, PNorm = 144.1022, GNorm = 0.6659, lr_0 = 6.7892e-04
Loss = 7.4156e-02, PNorm = 144.1728, GNorm = 0.5365, lr_0 = 6.7845e-04
Loss = 6.1315e-02, PNorm = 144.2429, GNorm = 0.3551, lr_0 = 6.7799e-04
Loss = 6.7221e-02, PNorm = 144.3079, GNorm = 0.6100, lr_0 = 6.7752e-04
Loss = 6.8420e-02, PNorm = 144.3742, GNorm = 1.4042, lr_0 = 6.7706e-04
Loss = 5.9117e-02, PNorm = 144.4422, GNorm = 0.4222, lr_0 = 6.7659e-04
Loss = 5.9218e-02, PNorm = 144.5056, GNorm = 0.5627, lr_0 = 6.7613e-04
Loss = 6.5194e-02, PNorm = 144.5659, GNorm = 0.4303, lr_0 = 6.7567e-04
Loss = 5.6524e-02, PNorm = 144.6364, GNorm = 0.5166, lr_0 = 6.7520e-04
Loss = 6.2374e-02, PNorm = 144.7050, GNorm = 1.1116, lr_0 = 6.7474e-04
Loss = 5.6488e-02, PNorm = 144.7643, GNorm = 0.6334, lr_0 = 6.7428e-04
Loss = 7.7951e-02, PNorm = 144.8244, GNorm = 0.3582, lr_0 = 6.7382e-04
Loss = 5.7388e-02, PNorm = 144.8859, GNorm = 0.3363, lr_0 = 6.7335e-04
Loss = 7.0489e-02, PNorm = 144.9505, GNorm = 0.4747, lr_0 = 6.7289e-04
Loss = 6.6417e-02, PNorm = 145.0156, GNorm = 0.5475, lr_0 = 6.7243e-04
Loss = 6.6020e-02, PNorm = 145.0835, GNorm = 1.1186, lr_0 = 6.7197e-04
Loss = 7.3091e-02, PNorm = 145.1515, GNorm = 0.6579, lr_0 = 6.7151e-04
Loss = 6.0200e-02, PNorm = 145.2268, GNorm = 0.5191, lr_0 = 6.7105e-04
Loss = 6.4398e-02, PNorm = 145.2972, GNorm = 0.9158, lr_0 = 6.7059e-04
Loss = 6.6927e-02, PNorm = 145.3698, GNorm = 0.7892, lr_0 = 6.7013e-04
Loss = 5.9911e-02, PNorm = 145.4368, GNorm = 0.6802, lr_0 = 6.6967e-04
Loss = 7.6404e-02, PNorm = 145.4992, GNorm = 0.3771, lr_0 = 6.6921e-04
Loss = 6.4063e-02, PNorm = 145.5643, GNorm = 0.4885, lr_0 = 6.6876e-04
Loss = 7.2774e-02, PNorm = 145.6264, GNorm = 0.9864, lr_0 = 6.6830e-04
Loss = 7.6286e-02, PNorm = 145.7018, GNorm = 0.6324, lr_0 = 6.6784e-04
Loss = 7.5951e-02, PNorm = 145.7727, GNorm = 0.8360, lr_0 = 6.6738e-04
Loss = 7.6334e-02, PNorm = 145.8464, GNorm = 0.7919, lr_0 = 6.6693e-04
Loss = 8.1948e-02, PNorm = 145.9142, GNorm = 0.4237, lr_0 = 6.6647e-04
Loss = 7.6641e-02, PNorm = 145.9883, GNorm = 0.9779, lr_0 = 6.6601e-04
Loss = 8.3627e-02, PNorm = 146.0588, GNorm = 0.6876, lr_0 = 6.6556e-04
Loss = 7.1624e-02, PNorm = 146.1331, GNorm = 0.6936, lr_0 = 6.6510e-04
Loss = 7.1104e-02, PNorm = 146.2059, GNorm = 0.5693, lr_0 = 6.6464e-04
Loss = 7.1418e-02, PNorm = 146.2845, GNorm = 0.5130, lr_0 = 6.6419e-04
Loss = 7.2045e-02, PNorm = 146.3667, GNorm = 0.7961, lr_0 = 6.6373e-04
Loss = 7.1452e-02, PNorm = 146.4488, GNorm = 0.7417, lr_0 = 6.6328e-04
Loss = 7.5863e-02, PNorm = 146.5353, GNorm = 0.7238, lr_0 = 6.6282e-04
Validation mae = 0.124262
Epoch 7
Loss = 5.4009e-02, PNorm = 146.6039, GNorm = 0.4185, lr_0 = 6.6237e-04
Loss = 4.7438e-02, PNorm = 146.6614, GNorm = 0.4844, lr_0 = 6.6192e-04
Loss = 5.3283e-02, PNorm = 146.7166, GNorm = 0.6761, lr_0 = 6.6146e-04
Loss = 4.6117e-02, PNorm = 146.7631, GNorm = 0.3519, lr_0 = 6.6101e-04
Loss = 5.2272e-02, PNorm = 146.8086, GNorm = 0.5348, lr_0 = 6.6056e-04
Loss = 4.7437e-02, PNorm = 146.8549, GNorm = 0.9811, lr_0 = 6.6011e-04
Loss = 5.2825e-02, PNorm = 146.8965, GNorm = 0.3144, lr_0 = 6.5965e-04
Loss = 4.9670e-02, PNorm = 146.9440, GNorm = 0.2847, lr_0 = 6.5920e-04
Loss = 4.8101e-02, PNorm = 146.9872, GNorm = 0.5536, lr_0 = 6.5875e-04
Loss = 5.2903e-02, PNorm = 147.0308, GNorm = 0.4534, lr_0 = 6.5830e-04
Loss = 4.9237e-02, PNorm = 147.0754, GNorm = 0.4757, lr_0 = 6.5785e-04
Loss = 4.4123e-02, PNorm = 147.1167, GNorm = 0.4802, lr_0 = 6.5740e-04
Loss = 4.4675e-02, PNorm = 147.1600, GNorm = 0.5863, lr_0 = 6.5695e-04
Loss = 4.9693e-02, PNorm = 147.2074, GNorm = 0.8578, lr_0 = 6.5650e-04
Loss = 6.1691e-02, PNorm = 147.2537, GNorm = 0.4278, lr_0 = 6.5605e-04
Loss = 6.3579e-02, PNorm = 147.2966, GNorm = 0.6985, lr_0 = 6.5560e-04
Loss = 4.0239e-02, PNorm = 147.3481, GNorm = 0.3868, lr_0 = 6.5515e-04
Loss = 4.9437e-02, PNorm = 147.4007, GNorm = 0.7270, lr_0 = 6.5470e-04
Loss = 4.2029e-02, PNorm = 147.4478, GNorm = 0.4458, lr_0 = 6.5425e-04
Loss = 4.4951e-02, PNorm = 147.4958, GNorm = 0.5853, lr_0 = 6.5380e-04
Loss = 3.5759e-02, PNorm = 147.5383, GNorm = 0.4868, lr_0 = 6.5335e-04
Loss = 4.3528e-02, PNorm = 147.5789, GNorm = 0.3626, lr_0 = 6.5291e-04
Loss = 5.4679e-02, PNorm = 147.6281, GNorm = 0.7362, lr_0 = 6.5246e-04
Loss = 4.2615e-02, PNorm = 147.6727, GNorm = 0.4599, lr_0 = 6.5201e-04
Loss = 4.5698e-02, PNorm = 147.7225, GNorm = 0.3727, lr_0 = 6.5157e-04
Loss = 4.3396e-02, PNorm = 147.7625, GNorm = 0.4171, lr_0 = 6.5112e-04
Loss = 5.0570e-02, PNorm = 147.8013, GNorm = 0.4541, lr_0 = 6.5067e-04
Loss = 4.4476e-02, PNorm = 147.8427, GNorm = 0.7592, lr_0 = 6.5023e-04
Loss = 4.8776e-02, PNorm = 147.8862, GNorm = 0.3828, lr_0 = 6.4978e-04
Loss = 4.6455e-02, PNorm = 147.9372, GNorm = 0.4279, lr_0 = 6.4934e-04
Loss = 5.9483e-02, PNorm = 147.9904, GNorm = 0.5248, lr_0 = 6.4889e-04
Loss = 4.4530e-02, PNorm = 148.0418, GNorm = 0.6844, lr_0 = 6.4845e-04
Loss = 4.1565e-02, PNorm = 148.0918, GNorm = 0.5505, lr_0 = 6.4800e-04
Loss = 6.2032e-02, PNorm = 148.1373, GNorm = 1.3807, lr_0 = 6.4756e-04
Loss = 5.3665e-02, PNorm = 148.1891, GNorm = 0.5367, lr_0 = 6.4712e-04
Loss = 4.5659e-02, PNorm = 148.2357, GNorm = 0.7291, lr_0 = 6.4667e-04
Loss = 5.5980e-02, PNorm = 148.2823, GNorm = 0.4011, lr_0 = 6.4623e-04
Loss = 4.4307e-02, PNorm = 148.3289, GNorm = 0.3108, lr_0 = 6.4579e-04
Loss = 4.3175e-02, PNorm = 148.3827, GNorm = 0.4165, lr_0 = 6.4534e-04
Loss = 5.7940e-02, PNorm = 148.4339, GNorm = 0.4773, lr_0 = 6.4490e-04
Loss = 4.8903e-02, PNorm = 148.4809, GNorm = 0.7223, lr_0 = 6.4446e-04
Loss = 4.9754e-02, PNorm = 148.5317, GNorm = 0.5483, lr_0 = 6.4402e-04
Loss = 5.9353e-02, PNorm = 148.5913, GNorm = 0.4335, lr_0 = 6.4358e-04
Loss = 4.0619e-02, PNorm = 148.6396, GNorm = 0.3378, lr_0 = 6.4314e-04
Loss = 4.5158e-02, PNorm = 148.6894, GNorm = 0.3907, lr_0 = 6.4270e-04
Loss = 4.0038e-02, PNorm = 148.7385, GNorm = 0.3442, lr_0 = 6.4226e-04
Loss = 4.8202e-02, PNorm = 148.7905, GNorm = 0.3672, lr_0 = 6.4182e-04
Loss = 5.1834e-02, PNorm = 148.8373, GNorm = 0.5918, lr_0 = 6.4138e-04
Loss = 5.1744e-02, PNorm = 148.8864, GNorm = 0.7646, lr_0 = 6.4094e-04
Loss = 6.0378e-02, PNorm = 148.9415, GNorm = 0.8786, lr_0 = 6.4050e-04
Loss = 4.4238e-02, PNorm = 148.9966, GNorm = 0.7292, lr_0 = 6.4006e-04
Loss = 5.1828e-02, PNorm = 149.0583, GNorm = 0.4785, lr_0 = 6.3962e-04
Loss = 4.5173e-02, PNorm = 149.1112, GNorm = 0.4120, lr_0 = 6.3918e-04
Loss = 4.4366e-02, PNorm = 149.1710, GNorm = 0.3823, lr_0 = 6.3874e-04
Loss = 4.4996e-02, PNorm = 149.2252, GNorm = 1.1320, lr_0 = 6.3831e-04
Loss = 5.2411e-02, PNorm = 149.2833, GNorm = 0.5119, lr_0 = 6.3787e-04
Loss = 4.9395e-02, PNorm = 149.3345, GNorm = 0.4291, lr_0 = 6.3743e-04
Loss = 5.2700e-02, PNorm = 149.3864, GNorm = 0.5746, lr_0 = 6.3700e-04
Loss = 4.9357e-02, PNorm = 149.4399, GNorm = 0.6212, lr_0 = 6.3656e-04
Loss = 4.5654e-02, PNorm = 149.4886, GNorm = 0.5257, lr_0 = 6.3612e-04
Loss = 5.5908e-02, PNorm = 149.5460, GNorm = 0.8196, lr_0 = 6.3569e-04
Loss = 6.3474e-02, PNorm = 149.6045, GNorm = 0.6522, lr_0 = 6.3525e-04
Loss = 4.4919e-02, PNorm = 149.6551, GNorm = 0.3329, lr_0 = 6.3482e-04
Loss = 5.6657e-02, PNorm = 149.7004, GNorm = 0.5450, lr_0 = 6.3438e-04
Loss = 6.3056e-02, PNorm = 149.7526, GNorm = 1.2501, lr_0 = 6.3395e-04
Loss = 4.4381e-02, PNorm = 149.8024, GNorm = 0.2716, lr_0 = 6.3351e-04
Loss = 4.6500e-02, PNorm = 149.8482, GNorm = 0.5639, lr_0 = 6.3308e-04
Loss = 5.4584e-02, PNorm = 149.9043, GNorm = 0.4455, lr_0 = 6.3265e-04
Loss = 4.6853e-02, PNorm = 149.9582, GNorm = 0.3111, lr_0 = 6.3221e-04
Loss = 4.9752e-02, PNorm = 150.0142, GNorm = 0.9489, lr_0 = 6.3178e-04
Loss = 4.6637e-02, PNorm = 150.0670, GNorm = 0.8760, lr_0 = 6.3135e-04
Loss = 5.3945e-02, PNorm = 150.1179, GNorm = 0.5193, lr_0 = 6.3091e-04
Loss = 5.4441e-02, PNorm = 150.1730, GNorm = 0.7154, lr_0 = 6.3048e-04
Loss = 5.2438e-02, PNorm = 150.2282, GNorm = 0.3127, lr_0 = 6.3005e-04
Loss = 4.8263e-02, PNorm = 150.2792, GNorm = 0.4153, lr_0 = 6.2962e-04
Loss = 5.0809e-02, PNorm = 150.3360, GNorm = 0.7808, lr_0 = 6.2919e-04
Loss = 4.7510e-02, PNorm = 150.3911, GNorm = 0.6509, lr_0 = 6.2876e-04
Loss = 4.7473e-02, PNorm = 150.4518, GNorm = 0.3052, lr_0 = 6.2833e-04
Loss = 3.8323e-02, PNorm = 150.5002, GNorm = 0.3643, lr_0 = 6.2789e-04
Loss = 5.1550e-02, PNorm = 150.5545, GNorm = 0.3673, lr_0 = 6.2746e-04
Loss = 4.0714e-02, PNorm = 150.6082, GNorm = 0.3968, lr_0 = 6.2703e-04
Loss = 5.8084e-02, PNorm = 150.6596, GNorm = 0.5444, lr_0 = 6.2661e-04
Loss = 4.0322e-02, PNorm = 150.7064, GNorm = 0.3630, lr_0 = 6.2618e-04
Loss = 5.5408e-02, PNorm = 150.7609, GNorm = 0.4826, lr_0 = 6.2575e-04
Loss = 5.5818e-02, PNorm = 150.8157, GNorm = 0.4229, lr_0 = 6.2532e-04
Loss = 5.8746e-02, PNorm = 150.8687, GNorm = 1.0642, lr_0 = 6.2489e-04
Loss = 4.4647e-02, PNorm = 150.9258, GNorm = 0.5920, lr_0 = 6.2446e-04
Loss = 5.1583e-02, PNorm = 150.9822, GNorm = 0.3545, lr_0 = 6.2403e-04
Loss = 5.5542e-02, PNorm = 151.0405, GNorm = 0.4714, lr_0 = 6.2361e-04
Loss = 5.1193e-02, PNorm = 151.0923, GNorm = 0.5220, lr_0 = 6.2318e-04
Loss = 5.6806e-02, PNorm = 151.1442, GNorm = 0.5096, lr_0 = 6.2275e-04
Loss = 4.7391e-02, PNorm = 151.1938, GNorm = 0.3195, lr_0 = 6.2233e-04
Loss = 4.8718e-02, PNorm = 151.2471, GNorm = 0.4060, lr_0 = 6.2190e-04
Loss = 4.3653e-02, PNorm = 151.2932, GNorm = 0.3556, lr_0 = 6.2147e-04
Loss = 5.9546e-02, PNorm = 151.3458, GNorm = 0.9873, lr_0 = 6.2105e-04
Loss = 4.3993e-02, PNorm = 151.3974, GNorm = 0.6216, lr_0 = 6.2062e-04
Loss = 5.5577e-02, PNorm = 151.4553, GNorm = 0.5668, lr_0 = 6.2020e-04
Loss = 5.0721e-02, PNorm = 151.5116, GNorm = 0.4093, lr_0 = 6.1977e-04
Loss = 5.5432e-02, PNorm = 151.5679, GNorm = 0.5312, lr_0 = 6.1935e-04
Loss = 5.3642e-02, PNorm = 151.6246, GNorm = 0.4607, lr_0 = 6.1892e-04
Loss = 5.1828e-02, PNorm = 151.6854, GNorm = 0.6591, lr_0 = 6.1850e-04
Loss = 6.5499e-02, PNorm = 151.7472, GNorm = 0.5806, lr_0 = 6.1808e-04
Loss = 4.9690e-02, PNorm = 151.8095, GNorm = 0.5117, lr_0 = 6.1765e-04
Loss = 5.9019e-02, PNorm = 151.8720, GNorm = 0.5597, lr_0 = 6.1723e-04
Loss = 5.2462e-02, PNorm = 151.9413, GNorm = 0.3475, lr_0 = 6.1681e-04
Loss = 5.0086e-02, PNorm = 152.0083, GNorm = 0.4820, lr_0 = 6.1638e-04
Loss = 6.1017e-02, PNorm = 152.0668, GNorm = 0.7030, lr_0 = 6.1596e-04
Loss = 5.2221e-02, PNorm = 152.1292, GNorm = 0.6847, lr_0 = 6.1554e-04
Loss = 5.3756e-02, PNorm = 152.1944, GNorm = 0.4580, lr_0 = 6.1512e-04
Loss = 4.6489e-02, PNorm = 152.2524, GNorm = 0.9171, lr_0 = 6.1470e-04
Loss = 6.3590e-02, PNorm = 152.3160, GNorm = 0.4697, lr_0 = 6.1428e-04
Loss = 5.9950e-02, PNorm = 152.3815, GNorm = 0.6393, lr_0 = 6.1385e-04
Loss = 6.0002e-02, PNorm = 152.4464, GNorm = 0.3878, lr_0 = 6.1343e-04
Loss = 4.7839e-02, PNorm = 152.5082, GNorm = 0.3961, lr_0 = 6.1301e-04
Loss = 4.6745e-02, PNorm = 152.5599, GNorm = 0.3463, lr_0 = 6.1259e-04
Loss = 5.1566e-02, PNorm = 152.6122, GNorm = 0.6597, lr_0 = 6.1217e-04
Loss = 6.0552e-02, PNorm = 152.6675, GNorm = 1.7607, lr_0 = 6.1175e-04
Loss = 5.1705e-02, PNorm = 152.7271, GNorm = 0.3337, lr_0 = 6.1134e-04
Loss = 5.3486e-02, PNorm = 152.7868, GNorm = 0.6898, lr_0 = 6.1092e-04
Loss = 5.2827e-02, PNorm = 152.8428, GNorm = 0.6759, lr_0 = 6.1050e-04
Validation mae = 0.123286
Epoch 8
Loss = 3.7114e-02, PNorm = 152.8902, GNorm = 0.7108, lr_0 = 6.1008e-04
Loss = 5.0352e-02, PNorm = 152.9362, GNorm = 0.2638, lr_0 = 6.0966e-04
Loss = 4.6120e-02, PNorm = 152.9801, GNorm = 0.5166, lr_0 = 6.0924e-04
Loss = 4.2161e-02, PNorm = 153.0220, GNorm = 0.3548, lr_0 = 6.0883e-04
Loss = 4.9184e-02, PNorm = 153.0713, GNorm = 1.5615, lr_0 = 6.0841e-04
Loss = 4.3333e-02, PNorm = 153.1089, GNorm = 0.4533, lr_0 = 6.0799e-04
Loss = 4.3080e-02, PNorm = 153.1476, GNorm = 0.3167, lr_0 = 6.0758e-04
Loss = 3.8306e-02, PNorm = 153.1819, GNorm = 0.6086, lr_0 = 6.0716e-04
Loss = 4.8068e-02, PNorm = 153.2180, GNorm = 0.4567, lr_0 = 6.0674e-04
Loss = 4.7755e-02, PNorm = 153.2550, GNorm = 0.2397, lr_0 = 6.0633e-04
Loss = 4.1082e-02, PNorm = 153.3013, GNorm = 0.3241, lr_0 = 6.0591e-04
Loss = 4.1273e-02, PNorm = 153.3370, GNorm = 0.5356, lr_0 = 6.0550e-04
Loss = 3.9767e-02, PNorm = 153.3783, GNorm = 0.3637, lr_0 = 6.0508e-04
Loss = 3.8467e-02, PNorm = 153.4183, GNorm = 0.3548, lr_0 = 6.0467e-04
Loss = 3.9951e-02, PNorm = 153.4611, GNorm = 0.3779, lr_0 = 6.0425e-04
Loss = 3.9565e-02, PNorm = 153.4947, GNorm = 0.3250, lr_0 = 6.0384e-04
Loss = 3.9122e-02, PNorm = 153.5321, GNorm = 0.3370, lr_0 = 6.0343e-04
Loss = 4.4646e-02, PNorm = 153.5693, GNorm = 0.4527, lr_0 = 6.0301e-04
Loss = 3.9581e-02, PNorm = 153.6121, GNorm = 0.5357, lr_0 = 6.0260e-04
Loss = 3.9884e-02, PNorm = 153.6528, GNorm = 0.6315, lr_0 = 6.0219e-04
Loss = 3.4526e-02, PNorm = 153.6924, GNorm = 0.2629, lr_0 = 6.0178e-04
Loss = 3.7287e-02, PNorm = 153.7292, GNorm = 0.3464, lr_0 = 6.0136e-04
Loss = 3.7480e-02, PNorm = 153.7647, GNorm = 0.4012, lr_0 = 6.0095e-04
Loss = 3.5943e-02, PNorm = 153.8009, GNorm = 0.3344, lr_0 = 6.0054e-04
Loss = 3.8113e-02, PNorm = 153.8353, GNorm = 0.3537, lr_0 = 6.0013e-04
Loss = 5.1110e-02, PNorm = 153.8808, GNorm = 0.7628, lr_0 = 5.9972e-04
Loss = 4.5412e-02, PNorm = 153.9240, GNorm = 0.5486, lr_0 = 5.9931e-04
Loss = 3.8163e-02, PNorm = 153.9660, GNorm = 0.5637, lr_0 = 5.9890e-04
Loss = 4.3184e-02, PNorm = 154.0139, GNorm = 0.9619, lr_0 = 5.9849e-04
Loss = 3.8407e-02, PNorm = 154.0538, GNorm = 0.6805, lr_0 = 5.9808e-04
Loss = 3.8596e-02, PNorm = 154.1003, GNorm = 0.5998, lr_0 = 5.9767e-04
Loss = 4.0863e-02, PNorm = 154.1443, GNorm = 0.4187, lr_0 = 5.9726e-04
Loss = 4.0468e-02, PNorm = 154.1873, GNorm = 0.4039, lr_0 = 5.9685e-04
Loss = 4.2686e-02, PNorm = 154.2219, GNorm = 0.6443, lr_0 = 5.9644e-04
Loss = 4.0371e-02, PNorm = 154.2654, GNorm = 0.3383, lr_0 = 5.9603e-04
Loss = 3.2273e-02, PNorm = 154.3067, GNorm = 0.4065, lr_0 = 5.9562e-04
Loss = 4.2575e-02, PNorm = 154.3519, GNorm = 0.3708, lr_0 = 5.9521e-04
Loss = 3.9453e-02, PNorm = 154.4006, GNorm = 0.4675, lr_0 = 5.9481e-04
Loss = 3.7861e-02, PNorm = 154.4467, GNorm = 0.4935, lr_0 = 5.9440e-04
Loss = 3.5273e-02, PNorm = 154.4891, GNorm = 0.4766, lr_0 = 5.9399e-04
Loss = 4.7321e-02, PNorm = 154.5300, GNorm = 0.4070, lr_0 = 5.9358e-04
Loss = 3.4107e-02, PNorm = 154.5718, GNorm = 0.8692, lr_0 = 5.9318e-04
Loss = 4.1016e-02, PNorm = 154.6163, GNorm = 0.2820, lr_0 = 5.9277e-04
Loss = 3.3589e-02, PNorm = 154.6560, GNorm = 0.4587, lr_0 = 5.9236e-04
Loss = 3.7829e-02, PNorm = 154.6985, GNorm = 0.3477, lr_0 = 5.9196e-04
Loss = 4.5978e-02, PNorm = 154.7405, GNorm = 0.8900, lr_0 = 5.9155e-04
Loss = 3.0995e-02, PNorm = 154.7855, GNorm = 0.2543, lr_0 = 5.9115e-04
Loss = 3.4063e-02, PNorm = 154.8276, GNorm = 0.6089, lr_0 = 5.9074e-04
Loss = 4.1144e-02, PNorm = 154.8674, GNorm = 0.3883, lr_0 = 5.9034e-04
Loss = 3.6985e-02, PNorm = 154.9078, GNorm = 0.3965, lr_0 = 5.8993e-04
Loss = 3.6395e-02, PNorm = 154.9543, GNorm = 0.5898, lr_0 = 5.8953e-04
Loss = 4.5276e-02, PNorm = 155.0000, GNorm = 0.8147, lr_0 = 5.8913e-04
Loss = 4.1471e-02, PNorm = 155.0492, GNorm = 0.3346, lr_0 = 5.8872e-04
Loss = 3.9839e-02, PNorm = 155.0955, GNorm = 0.4389, lr_0 = 5.8832e-04
Loss = 4.0173e-02, PNorm = 155.1381, GNorm = 0.3990, lr_0 = 5.8792e-04
Loss = 4.3102e-02, PNorm = 155.1804, GNorm = 0.7024, lr_0 = 5.8751e-04
Loss = 3.7464e-02, PNorm = 155.2240, GNorm = 0.4999, lr_0 = 5.8711e-04
Loss = 4.2495e-02, PNorm = 155.2686, GNorm = 0.6353, lr_0 = 5.8671e-04
Loss = 3.8634e-02, PNorm = 155.3126, GNorm = 0.2696, lr_0 = 5.8631e-04
Loss = 4.4142e-02, PNorm = 155.3524, GNorm = 0.4039, lr_0 = 5.8591e-04
Loss = 3.0526e-02, PNorm = 155.3969, GNorm = 0.3028, lr_0 = 5.8550e-04
Loss = 8.1308e-02, PNorm = 155.4386, GNorm = 0.7150, lr_0 = 5.8510e-04
Loss = 4.8108e-02, PNorm = 155.4875, GNorm = 0.6820, lr_0 = 5.8470e-04
Loss = 4.3149e-02, PNorm = 155.5339, GNorm = 0.3740, lr_0 = 5.8430e-04
Loss = 4.0959e-02, PNorm = 155.5819, GNorm = 0.7089, lr_0 = 5.8390e-04
Loss = 3.8040e-02, PNorm = 155.6306, GNorm = 0.3178, lr_0 = 5.8350e-04
Loss = 3.7179e-02, PNorm = 155.6750, GNorm = 0.4645, lr_0 = 5.8310e-04
Loss = 3.5502e-02, PNorm = 155.7187, GNorm = 0.4889, lr_0 = 5.8270e-04
Loss = 3.8862e-02, PNorm = 155.7661, GNorm = 0.6684, lr_0 = 5.8230e-04
Loss = 3.4030e-02, PNorm = 155.8094, GNorm = 0.3512, lr_0 = 5.8190e-04
Loss = 4.1864e-02, PNorm = 155.8541, GNorm = 0.4614, lr_0 = 5.8151e-04
Loss = 4.7807e-02, PNorm = 155.8972, GNorm = 0.4049, lr_0 = 5.8111e-04
Loss = 3.7174e-02, PNorm = 155.9454, GNorm = 0.4897, lr_0 = 5.8071e-04
Loss = 5.1929e-02, PNorm = 155.9868, GNorm = 0.6492, lr_0 = 5.8031e-04
Loss = 3.4310e-02, PNorm = 156.0332, GNorm = 0.3422, lr_0 = 5.7991e-04
Loss = 3.9532e-02, PNorm = 156.0809, GNorm = 0.2721, lr_0 = 5.7952e-04
Loss = 3.8741e-02, PNorm = 156.1281, GNorm = 0.4059, lr_0 = 5.7912e-04
Loss = 3.5279e-02, PNorm = 156.1688, GNorm = 0.4412, lr_0 = 5.7872e-04
Loss = 4.5909e-02, PNorm = 156.2071, GNorm = 0.5525, lr_0 = 5.7833e-04
Loss = 4.5890e-02, PNorm = 156.2493, GNorm = 0.7214, lr_0 = 5.7793e-04
Loss = 3.9779e-02, PNorm = 156.2988, GNorm = 0.4921, lr_0 = 5.7753e-04
Loss = 3.8005e-02, PNorm = 156.3404, GNorm = 0.4118, lr_0 = 5.7714e-04
Loss = 4.2724e-02, PNorm = 156.3850, GNorm = 0.4264, lr_0 = 5.7674e-04
Loss = 3.9720e-02, PNorm = 156.4356, GNorm = 0.6896, lr_0 = 5.7635e-04
Loss = 3.9350e-02, PNorm = 156.4842, GNorm = 0.4260, lr_0 = 5.7595e-04
Loss = 3.6719e-02, PNorm = 156.5355, GNorm = 0.5388, lr_0 = 5.7556e-04
Loss = 3.8644e-02, PNorm = 156.5857, GNorm = 0.6108, lr_0 = 5.7516e-04
Loss = 3.8293e-02, PNorm = 156.6354, GNorm = 0.5541, lr_0 = 5.7477e-04
Loss = 4.4228e-02, PNorm = 156.6833, GNorm = 0.2843, lr_0 = 5.7438e-04
Loss = 3.6689e-02, PNorm = 156.7308, GNorm = 0.4312, lr_0 = 5.7398e-04
Loss = 3.1804e-02, PNorm = 156.7744, GNorm = 0.9777, lr_0 = 5.7359e-04
Loss = 4.1370e-02, PNorm = 156.8163, GNorm = 0.2672, lr_0 = 5.7320e-04
Loss = 3.6948e-02, PNorm = 156.8599, GNorm = 0.3743, lr_0 = 5.7280e-04
Loss = 3.7018e-02, PNorm = 156.9048, GNorm = 0.4218, lr_0 = 5.7241e-04
Loss = 4.1022e-02, PNorm = 156.9558, GNorm = 0.5365, lr_0 = 5.7202e-04
Loss = 3.6725e-02, PNorm = 156.9998, GNorm = 0.2871, lr_0 = 5.7163e-04
Loss = 5.3025e-02, PNorm = 157.0451, GNorm = 0.9250, lr_0 = 5.7124e-04
Loss = 3.7460e-02, PNorm = 157.0904, GNorm = 0.3315, lr_0 = 5.7084e-04
Loss = 3.0608e-02, PNorm = 157.1372, GNorm = 0.3922, lr_0 = 5.7045e-04
Loss = 3.6960e-02, PNorm = 157.1784, GNorm = 0.3702, lr_0 = 5.7006e-04
Loss = 3.5627e-02, PNorm = 157.2245, GNorm = 0.2670, lr_0 = 5.6967e-04
Loss = 3.8790e-02, PNorm = 157.2707, GNorm = 0.5213, lr_0 = 5.6928e-04
Loss = 4.7065e-02, PNorm = 157.3113, GNorm = 0.8404, lr_0 = 5.6889e-04
Loss = 4.1715e-02, PNorm = 157.3620, GNorm = 0.7240, lr_0 = 5.6850e-04
Loss = 4.7508e-02, PNorm = 157.4232, GNorm = 0.3351, lr_0 = 5.6811e-04
Loss = 4.2906e-02, PNorm = 157.4803, GNorm = 0.4780, lr_0 = 5.6772e-04
Loss = 4.4886e-02, PNorm = 157.5387, GNorm = 0.4112, lr_0 = 5.6733e-04
Loss = 4.3999e-02, PNorm = 157.5868, GNorm = 0.6076, lr_0 = 5.6695e-04
Loss = 4.1449e-02, PNorm = 157.6403, GNorm = 0.5643, lr_0 = 5.6656e-04
Loss = 4.3034e-02, PNorm = 157.6855, GNorm = 0.4476, lr_0 = 5.6617e-04
Loss = 3.9956e-02, PNorm = 157.7398, GNorm = 0.5012, lr_0 = 5.6578e-04
Loss = 4.1833e-02, PNorm = 157.7937, GNorm = 0.3593, lr_0 = 5.6539e-04
Loss = 3.3822e-02, PNorm = 157.8490, GNorm = 0.3293, lr_0 = 5.6501e-04
Loss = 4.6977e-02, PNorm = 157.9042, GNorm = 0.2888, lr_0 = 5.6462e-04
Loss = 3.6063e-02, PNorm = 157.9576, GNorm = 0.3939, lr_0 = 5.6423e-04
Loss = 3.7548e-02, PNorm = 158.0081, GNorm = 0.4347, lr_0 = 5.6385e-04
Loss = 4.8613e-02, PNorm = 158.0565, GNorm = 0.6308, lr_0 = 5.6346e-04
Loss = 4.5720e-02, PNorm = 158.1085, GNorm = 0.4644, lr_0 = 5.6307e-04
Loss = 4.1921e-02, PNorm = 158.1625, GNorm = 0.3299, lr_0 = 5.6269e-04
Loss = 5.3814e-02, PNorm = 158.2180, GNorm = 1.7410, lr_0 = 5.6230e-04
Validation mae = 0.123126
Epoch 9
Loss = 3.9118e-02, PNorm = 158.2572, GNorm = 0.9331, lr_0 = 5.6192e-04
Loss = 4.7318e-02, PNorm = 158.2993, GNorm = 0.4210, lr_0 = 5.6153e-04
Loss = 3.5528e-02, PNorm = 158.3411, GNorm = 0.2701, lr_0 = 5.6115e-04
Loss = 3.6426e-02, PNorm = 158.3797, GNorm = 0.3922, lr_0 = 5.6076e-04
Loss = 3.1821e-02, PNorm = 158.4114, GNorm = 0.2848, lr_0 = 5.6038e-04
Loss = 3.5019e-02, PNorm = 158.4410, GNorm = 0.2805, lr_0 = 5.6000e-04
Loss = 2.8081e-02, PNorm = 158.4744, GNorm = 0.2644, lr_0 = 5.5961e-04
Loss = 3.6087e-02, PNorm = 158.5123, GNorm = 0.3452, lr_0 = 5.5923e-04
Loss = 3.6147e-02, PNorm = 158.5467, GNorm = 0.4423, lr_0 = 5.5885e-04
Loss = 3.2267e-02, PNorm = 158.5810, GNorm = 0.6791, lr_0 = 5.5846e-04
Loss = 3.7704e-02, PNorm = 158.6151, GNorm = 0.3219, lr_0 = 5.5808e-04
Loss = 2.9285e-02, PNorm = 158.6433, GNorm = 0.2530, lr_0 = 5.5770e-04
Loss = 3.2705e-02, PNorm = 158.6802, GNorm = 0.4924, lr_0 = 5.5732e-04
Loss = 3.2768e-02, PNorm = 158.7100, GNorm = 0.2940, lr_0 = 5.5693e-04
Loss = 3.5392e-02, PNorm = 158.7429, GNorm = 0.3329, lr_0 = 5.5655e-04
Loss = 2.9810e-02, PNorm = 158.7788, GNorm = 0.2551, lr_0 = 5.5617e-04
Loss = 3.1749e-02, PNorm = 158.8152, GNorm = 0.3539, lr_0 = 5.5579e-04
Loss = 3.8221e-02, PNorm = 158.8485, GNorm = 0.6616, lr_0 = 5.5541e-04
Loss = 4.0194e-02, PNorm = 158.8874, GNorm = 0.6181, lr_0 = 5.5503e-04
Loss = 3.2598e-02, PNorm = 158.9159, GNorm = 0.4901, lr_0 = 5.5465e-04
Loss = 4.9694e-02, PNorm = 158.9532, GNorm = 0.8992, lr_0 = 5.5427e-04
Loss = 3.1256e-02, PNorm = 158.9872, GNorm = 0.3722, lr_0 = 5.5389e-04
Loss = 3.3009e-02, PNorm = 159.0208, GNorm = 0.5258, lr_0 = 5.5351e-04
Loss = 3.4364e-02, PNorm = 159.0562, GNorm = 0.3654, lr_0 = 5.5313e-04
Loss = 3.4553e-02, PNorm = 159.0912, GNorm = 0.2637, lr_0 = 5.5275e-04
Loss = 3.3808e-02, PNorm = 159.1308, GNorm = 0.3139, lr_0 = 5.5237e-04
Loss = 2.9070e-02, PNorm = 159.1722, GNorm = 0.6596, lr_0 = 5.5199e-04
Loss = 3.8658e-02, PNorm = 159.2120, GNorm = 0.5011, lr_0 = 5.5162e-04
Loss = 3.1192e-02, PNorm = 159.2468, GNorm = 0.5224, lr_0 = 5.5124e-04
Loss = 2.9867e-02, PNorm = 159.2847, GNorm = 0.8230, lr_0 = 5.5086e-04
Loss = 2.3946e-02, PNorm = 159.3231, GNorm = 0.5168, lr_0 = 5.5048e-04
Loss = 2.8085e-02, PNorm = 159.3533, GNorm = 0.2324, lr_0 = 5.5011e-04
Loss = 2.5986e-02, PNorm = 159.3864, GNorm = 0.3121, lr_0 = 5.4973e-04
Loss = 2.9806e-02, PNorm = 159.4185, GNorm = 0.2715, lr_0 = 5.4935e-04
Loss = 3.5315e-02, PNorm = 159.4526, GNorm = 0.4714, lr_0 = 5.4898e-04
Loss = 2.7031e-02, PNorm = 159.4899, GNorm = 0.5692, lr_0 = 5.4860e-04
Loss = 2.9923e-02, PNorm = 159.5240, GNorm = 0.4733, lr_0 = 5.4822e-04
Loss = 2.8920e-02, PNorm = 159.5593, GNorm = 0.4778, lr_0 = 5.4785e-04
Loss = 2.4723e-02, PNorm = 159.5897, GNorm = 0.3476, lr_0 = 5.4747e-04
Loss = 3.2216e-02, PNorm = 159.6223, GNorm = 0.3778, lr_0 = 5.4710e-04
Loss = 3.1544e-02, PNorm = 159.6552, GNorm = 0.3640, lr_0 = 5.4672e-04
Loss = 2.5621e-02, PNorm = 159.6884, GNorm = 0.5994, lr_0 = 5.4635e-04
Loss = 3.0582e-02, PNorm = 159.7220, GNorm = 0.5766, lr_0 = 5.4597e-04
Loss = 3.6853e-02, PNorm = 159.7592, GNorm = 0.5250, lr_0 = 5.4560e-04
Loss = 2.9920e-02, PNorm = 159.7965, GNorm = 0.2779, lr_0 = 5.4523e-04
Loss = 2.7701e-02, PNorm = 159.8335, GNorm = 0.3067, lr_0 = 5.4485e-04
Loss = 4.0299e-02, PNorm = 159.8710, GNorm = 0.2581, lr_0 = 5.4448e-04
Loss = 3.5997e-02, PNorm = 159.9099, GNorm = 0.3257, lr_0 = 5.4411e-04
Loss = 3.7062e-02, PNorm = 159.9458, GNorm = 0.2584, lr_0 = 5.4373e-04
Loss = 3.1064e-02, PNorm = 159.9839, GNorm = 0.4787, lr_0 = 5.4336e-04
Loss = 3.2601e-02, PNorm = 160.0180, GNorm = 0.8129, lr_0 = 5.4299e-04
Loss = 3.7839e-02, PNorm = 160.0575, GNorm = 0.7259, lr_0 = 5.4262e-04
Loss = 3.9184e-02, PNorm = 160.0939, GNorm = 0.3646, lr_0 = 5.4225e-04
Loss = 3.9176e-02, PNorm = 160.1377, GNorm = 0.3890, lr_0 = 5.4187e-04
Loss = 3.2942e-02, PNorm = 160.1814, GNorm = 0.2156, lr_0 = 5.4150e-04
Loss = 3.0202e-02, PNorm = 160.2245, GNorm = 0.2469, lr_0 = 5.4113e-04
Loss = 3.8275e-02, PNorm = 160.2705, GNorm = 0.3545, lr_0 = 5.4076e-04
Loss = 3.7636e-02, PNorm = 160.3084, GNorm = 0.2926, lr_0 = 5.4039e-04
Loss = 3.1556e-02, PNorm = 160.3514, GNorm = 0.5294, lr_0 = 5.4002e-04
Loss = 3.3375e-02, PNorm = 160.3894, GNorm = 0.5513, lr_0 = 5.3965e-04
Loss = 3.4678e-02, PNorm = 160.4363, GNorm = 0.2852, lr_0 = 5.3928e-04
Loss = 3.1568e-02, PNorm = 160.4775, GNorm = 0.3560, lr_0 = 5.3891e-04
Loss = 3.7186e-02, PNorm = 160.5202, GNorm = 0.3218, lr_0 = 5.3854e-04
Loss = 4.5555e-02, PNorm = 160.5648, GNorm = 0.6278, lr_0 = 5.3817e-04
Loss = 3.1439e-02, PNorm = 160.6087, GNorm = 0.4156, lr_0 = 5.3781e-04
Loss = 3.8006e-02, PNorm = 160.6494, GNorm = 0.2550, lr_0 = 5.3744e-04
Loss = 3.8165e-02, PNorm = 160.6886, GNorm = 0.5508, lr_0 = 5.3707e-04
Loss = 3.1062e-02, PNorm = 160.7270, GNorm = 0.3883, lr_0 = 5.3670e-04
Loss = 5.3349e-02, PNorm = 160.7732, GNorm = 0.3821, lr_0 = 5.3633e-04
Loss = 2.9344e-02, PNorm = 160.8108, GNorm = 0.4419, lr_0 = 5.3597e-04
Loss = 3.0000e-02, PNorm = 160.8489, GNorm = 0.2269, lr_0 = 5.3560e-04
Loss = 2.8421e-02, PNorm = 160.8810, GNorm = 0.3407, lr_0 = 5.3523e-04
Loss = 3.4632e-02, PNorm = 160.9132, GNorm = 0.3695, lr_0 = 5.3486e-04
Loss = 3.0688e-02, PNorm = 160.9491, GNorm = 0.3090, lr_0 = 5.3450e-04
Loss = 4.3000e-02, PNorm = 160.9866, GNorm = 0.4663, lr_0 = 5.3413e-04
Loss = 2.9510e-02, PNorm = 161.0318, GNorm = 0.3100, lr_0 = 5.3377e-04
Loss = 3.5379e-02, PNorm = 161.0732, GNorm = 0.2737, lr_0 = 5.3340e-04
Loss = 3.2724e-02, PNorm = 161.1133, GNorm = 0.3011, lr_0 = 5.3304e-04
Loss = 2.8969e-02, PNorm = 161.1519, GNorm = 0.3679, lr_0 = 5.3267e-04
Loss = 3.0001e-02, PNorm = 161.1898, GNorm = 0.2019, lr_0 = 5.3231e-04
Loss = 3.7996e-02, PNorm = 161.2276, GNorm = 0.3096, lr_0 = 5.3194e-04
Loss = 2.9038e-02, PNorm = 161.2660, GNorm = 0.3854, lr_0 = 5.3158e-04
Loss = 2.9984e-02, PNorm = 161.3091, GNorm = 0.3740, lr_0 = 5.3121e-04
Loss = 3.4998e-02, PNorm = 161.3512, GNorm = 0.3133, lr_0 = 5.3085e-04
Loss = 4.2446e-02, PNorm = 161.3927, GNorm = 0.4603, lr_0 = 5.3048e-04
Loss = 2.9800e-02, PNorm = 161.4392, GNorm = 0.1886, lr_0 = 5.3012e-04
Loss = 3.1082e-02, PNorm = 161.4802, GNorm = 0.5410, lr_0 = 5.2976e-04
Loss = 2.9915e-02, PNorm = 161.5174, GNorm = 0.2864, lr_0 = 5.2939e-04
Loss = 2.3864e-02, PNorm = 161.5532, GNorm = 0.2692, lr_0 = 5.2903e-04
Loss = 2.8687e-02, PNorm = 161.5920, GNorm = 0.4876, lr_0 = 5.2867e-04
Loss = 3.1567e-02, PNorm = 161.6327, GNorm = 0.4423, lr_0 = 5.2831e-04
Loss = 3.3383e-02, PNorm = 161.6695, GNorm = 0.3515, lr_0 = 5.2795e-04
Loss = 3.8045e-02, PNorm = 161.7111, GNorm = 0.4037, lr_0 = 5.2758e-04
Loss = 3.2314e-02, PNorm = 161.7498, GNorm = 0.4506, lr_0 = 5.2722e-04
Loss = 3.7092e-02, PNorm = 161.7871, GNorm = 0.6373, lr_0 = 5.2686e-04
Loss = 3.0284e-02, PNorm = 161.8305, GNorm = 0.2466, lr_0 = 5.2650e-04
Loss = 3.3902e-02, PNorm = 161.8757, GNorm = 0.3791, lr_0 = 5.2614e-04
Loss = 3.8458e-02, PNorm = 161.9176, GNorm = 0.6565, lr_0 = 5.2578e-04
Loss = 4.1686e-02, PNorm = 161.9566, GNorm = 0.2396, lr_0 = 5.2542e-04
Loss = 3.6216e-02, PNorm = 162.0018, GNorm = 0.5311, lr_0 = 5.2506e-04
Loss = 3.9317e-02, PNorm = 162.0500, GNorm = 0.4894, lr_0 = 5.2470e-04
Loss = 3.5374e-02, PNorm = 162.0925, GNorm = 0.3314, lr_0 = 5.2434e-04
Loss = 2.7559e-02, PNorm = 162.1389, GNorm = 0.6391, lr_0 = 5.2398e-04
Loss = 3.0804e-02, PNorm = 162.1819, GNorm = 0.3084, lr_0 = 5.2362e-04
Loss = 3.5922e-02, PNorm = 162.2251, GNorm = 0.5795, lr_0 = 5.2326e-04
Loss = 4.5407e-02, PNorm = 162.2636, GNorm = 0.8091, lr_0 = 5.2290e-04
Loss = 2.7368e-02, PNorm = 162.3029, GNorm = 0.2827, lr_0 = 5.2255e-04
Loss = 3.2349e-02, PNorm = 162.3500, GNorm = 0.2564, lr_0 = 5.2219e-04
Loss = 3.4501e-02, PNorm = 162.3979, GNorm = 0.8396, lr_0 = 5.2183e-04
Loss = 3.9992e-02, PNorm = 162.4371, GNorm = 0.5405, lr_0 = 5.2147e-04
Loss = 3.4384e-02, PNorm = 162.4802, GNorm = 0.4285, lr_0 = 5.2112e-04
Loss = 2.9714e-02, PNorm = 162.5245, GNorm = 0.2567, lr_0 = 5.2076e-04
Loss = 3.2856e-02, PNorm = 162.5647, GNorm = 0.5721, lr_0 = 5.2040e-04
Loss = 3.8755e-02, PNorm = 162.6061, GNorm = 0.2836, lr_0 = 5.2005e-04
Loss = 2.9345e-02, PNorm = 162.6483, GNorm = 0.4785, lr_0 = 5.1969e-04
Loss = 2.6479e-02, PNorm = 162.6860, GNorm = 0.2286, lr_0 = 5.1933e-04
Loss = 3.1725e-02, PNorm = 162.7246, GNorm = 0.3261, lr_0 = 5.1898e-04
Loss = 2.7887e-02, PNorm = 162.7657, GNorm = 0.6496, lr_0 = 5.1862e-04
Loss = 3.2424e-02, PNorm = 162.8072, GNorm = 0.2799, lr_0 = 5.1827e-04
Loss = 2.8830e-02, PNorm = 162.8488, GNorm = 0.2701, lr_0 = 5.1791e-04
Validation mae = 0.123090
Epoch 10
Loss = 2.8535e-02, PNorm = 162.8834, GNorm = 0.2827, lr_0 = 5.1756e-04
Loss = 2.9141e-02, PNorm = 162.9147, GNorm = 0.3458, lr_0 = 5.1720e-04
Loss = 3.0360e-02, PNorm = 162.9422, GNorm = 0.7651, lr_0 = 5.1685e-04
Loss = 2.8772e-02, PNorm = 162.9725, GNorm = 0.3557, lr_0 = 5.1649e-04
Loss = 3.9510e-02, PNorm = 163.0101, GNorm = 0.4489, lr_0 = 5.1614e-04
Loss = 3.1084e-02, PNorm = 163.0451, GNorm = 0.4928, lr_0 = 5.1579e-04
Loss = 2.8005e-02, PNorm = 163.0738, GNorm = 0.4768, lr_0 = 5.1543e-04
Loss = 2.4420e-02, PNorm = 163.1005, GNorm = 0.1903, lr_0 = 5.1508e-04
Loss = 3.0300e-02, PNorm = 163.1247, GNorm = 0.6967, lr_0 = 5.1473e-04
Loss = 2.8832e-02, PNorm = 163.1459, GNorm = 0.1740, lr_0 = 5.1437e-04
Loss = 3.0739e-02, PNorm = 163.1709, GNorm = 0.5115, lr_0 = 5.1402e-04
Loss = 2.5876e-02, PNorm = 163.2008, GNorm = 0.5431, lr_0 = 5.1367e-04
Loss = 2.8513e-02, PNorm = 163.2297, GNorm = 0.3378, lr_0 = 5.1332e-04
Loss = 2.6422e-02, PNorm = 163.2567, GNorm = 0.2907, lr_0 = 5.1297e-04
Loss = 2.8258e-02, PNorm = 163.2857, GNorm = 0.7002, lr_0 = 5.1262e-04
Loss = 3.0355e-02, PNorm = 163.3154, GNorm = 0.3447, lr_0 = 5.1226e-04
Loss = 2.5951e-02, PNorm = 163.3420, GNorm = 0.2907, lr_0 = 5.1191e-04
Loss = 2.9616e-02, PNorm = 163.3685, GNorm = 0.3402, lr_0 = 5.1156e-04
Loss = 3.5838e-02, PNorm = 163.3981, GNorm = 0.3041, lr_0 = 5.1121e-04
Loss = 2.7239e-02, PNorm = 163.4299, GNorm = 0.6952, lr_0 = 5.1086e-04
Loss = 2.4045e-02, PNorm = 163.4644, GNorm = 0.3161, lr_0 = 5.1051e-04
Loss = 2.1525e-02, PNorm = 163.4958, GNorm = 0.3146, lr_0 = 5.1016e-04
Loss = 2.8151e-02, PNorm = 163.5230, GNorm = 0.5795, lr_0 = 5.0981e-04
Loss = 2.9915e-02, PNorm = 163.5527, GNorm = 0.4169, lr_0 = 5.0946e-04
Loss = 2.6514e-02, PNorm = 163.5825, GNorm = 0.4946, lr_0 = 5.0911e-04
Loss = 3.4172e-02, PNorm = 163.6121, GNorm = 0.4116, lr_0 = 5.0877e-04
Loss = 2.2574e-02, PNorm = 163.6372, GNorm = 0.5427, lr_0 = 5.0842e-04
Loss = 2.5997e-02, PNorm = 163.6668, GNorm = 0.2789, lr_0 = 5.0807e-04
Loss = 3.3733e-02, PNorm = 163.6930, GNorm = 0.6584, lr_0 = 5.0772e-04
Loss = 2.6107e-02, PNorm = 163.7269, GNorm = 0.7730, lr_0 = 5.0737e-04
Loss = 3.0648e-02, PNorm = 163.7508, GNorm = 0.5097, lr_0 = 5.0703e-04
Loss = 2.4879e-02, PNorm = 163.7790, GNorm = 0.3773, lr_0 = 5.0668e-04
Loss = 2.2669e-02, PNorm = 163.8076, GNorm = 0.4464, lr_0 = 5.0633e-04
Loss = 2.4347e-02, PNorm = 163.8390, GNorm = 0.3567, lr_0 = 5.0598e-04
Loss = 2.4527e-02, PNorm = 163.8722, GNorm = 0.6026, lr_0 = 5.0564e-04
Loss = 2.6726e-02, PNorm = 163.8993, GNorm = 0.4165, lr_0 = 5.0529e-04
Loss = 3.3648e-02, PNorm = 163.9318, GNorm = 0.4178, lr_0 = 5.0494e-04
Loss = 2.4038e-02, PNorm = 163.9599, GNorm = 0.2965, lr_0 = 5.0460e-04
Loss = 2.8594e-02, PNorm = 163.9886, GNorm = 0.2476, lr_0 = 5.0425e-04
Loss = 2.5441e-02, PNorm = 164.0182, GNorm = 0.3014, lr_0 = 5.0391e-04
Loss = 2.3597e-02, PNorm = 164.0487, GNorm = 0.4148, lr_0 = 5.0356e-04
Loss = 2.7358e-02, PNorm = 164.0743, GNorm = 0.5508, lr_0 = 5.0322e-04
Loss = 2.7559e-02, PNorm = 164.1037, GNorm = 0.4681, lr_0 = 5.0287e-04
Loss = 3.0626e-02, PNorm = 164.1407, GNorm = 0.3339, lr_0 = 5.0253e-04
Loss = 3.1959e-02, PNorm = 164.1682, GNorm = 0.5346, lr_0 = 5.0218e-04
Loss = 2.3951e-02, PNorm = 164.2023, GNorm = 0.3812, lr_0 = 5.0184e-04
Loss = 2.8385e-02, PNorm = 164.2326, GNorm = 0.2627, lr_0 = 5.0150e-04
Loss = 2.5722e-02, PNorm = 164.2647, GNorm = 0.3076, lr_0 = 5.0115e-04
Loss = 2.6591e-02, PNorm = 164.2888, GNorm = 1.1025, lr_0 = 5.0081e-04
Loss = 2.8166e-02, PNorm = 164.3164, GNorm = 0.6357, lr_0 = 5.0047e-04
Loss = 2.9402e-02, PNorm = 164.3526, GNorm = 0.4554, lr_0 = 5.0012e-04
Loss = 2.9473e-02, PNorm = 164.3879, GNorm = 0.6511, lr_0 = 4.9978e-04
Loss = 4.3674e-02, PNorm = 164.4187, GNorm = 0.6938, lr_0 = 4.9944e-04
Loss = 3.0457e-02, PNorm = 164.4570, GNorm = 0.2926, lr_0 = 4.9910e-04
Loss = 2.9965e-02, PNorm = 164.4912, GNorm = 0.4303, lr_0 = 4.9875e-04
Loss = 2.8201e-02, PNorm = 164.5266, GNorm = 0.2672, lr_0 = 4.9841e-04
Loss = 2.5943e-02, PNorm = 164.5611, GNorm = 0.3474, lr_0 = 4.9807e-04
Loss = 2.4754e-02, PNorm = 164.5929, GNorm = 0.5205, lr_0 = 4.9773e-04
Loss = 3.1004e-02, PNorm = 164.6293, GNorm = 0.3742, lr_0 = 4.9739e-04
Loss = 2.4416e-02, PNorm = 164.6656, GNorm = 0.1833, lr_0 = 4.9705e-04
Loss = 2.8840e-02, PNorm = 164.6996, GNorm = 0.2303, lr_0 = 4.9671e-04
Loss = 2.5778e-02, PNorm = 164.7294, GNorm = 0.3057, lr_0 = 4.9637e-04
Loss = 2.2146e-02, PNorm = 164.7624, GNorm = 0.3306, lr_0 = 4.9603e-04
Loss = 2.1933e-02, PNorm = 164.7965, GNorm = 0.5278, lr_0 = 4.9569e-04
Loss = 2.9876e-02, PNorm = 164.8308, GNorm = 0.2880, lr_0 = 4.9535e-04
Loss = 2.8067e-02, PNorm = 164.8654, GNorm = 0.3058, lr_0 = 4.9501e-04
Loss = 2.1276e-02, PNorm = 164.8958, GNorm = 0.2513, lr_0 = 4.9467e-04
Loss = 2.5540e-02, PNorm = 164.9266, GNorm = 0.2639, lr_0 = 4.9433e-04
Loss = 2.6723e-02, PNorm = 164.9575, GNorm = 0.3301, lr_0 = 4.9399e-04
Loss = 2.5693e-02, PNorm = 164.9877, GNorm = 0.5677, lr_0 = 4.9365e-04
Loss = 3.0564e-02, PNorm = 165.0220, GNorm = 0.2959, lr_0 = 4.9332e-04
Loss = 2.9651e-02, PNorm = 165.0535, GNorm = 0.6956, lr_0 = 4.9298e-04
Loss = 3.9387e-02, PNorm = 165.0894, GNorm = 0.2876, lr_0 = 4.9264e-04
Loss = 2.4928e-02, PNorm = 165.1268, GNorm = 0.3455, lr_0 = 4.9230e-04
Loss = 2.5149e-02, PNorm = 165.1605, GNorm = 0.3416, lr_0 = 4.9197e-04
Loss = 2.6282e-02, PNorm = 165.1938, GNorm = 0.4190, lr_0 = 4.9163e-04
Loss = 2.8456e-02, PNorm = 165.2291, GNorm = 0.2879, lr_0 = 4.9129e-04
Loss = 3.8317e-02, PNorm = 165.2634, GNorm = 0.4637, lr_0 = 4.9095e-04
Loss = 2.2666e-02, PNorm = 165.2942, GNorm = 0.3642, lr_0 = 4.9062e-04
Loss = 2.9080e-02, PNorm = 165.3262, GNorm = 0.3348, lr_0 = 4.9028e-04
Loss = 2.6187e-02, PNorm = 165.3561, GNorm = 0.5364, lr_0 = 4.8995e-04
Loss = 2.6520e-02, PNorm = 165.3908, GNorm = 0.3612, lr_0 = 4.8961e-04
Loss = 2.6696e-02, PNorm = 165.4236, GNorm = 0.2792, lr_0 = 4.8928e-04
Loss = 4.0347e-02, PNorm = 165.4591, GNorm = 0.4614, lr_0 = 4.8894e-04
Loss = 2.7360e-02, PNorm = 165.4955, GNorm = 0.2565, lr_0 = 4.8861e-04
Loss = 2.3519e-02, PNorm = 165.5306, GNorm = 0.5093, lr_0 = 4.8827e-04
Loss = 2.6797e-02, PNorm = 165.5621, GNorm = 0.4203, lr_0 = 4.8794e-04
Loss = 2.9632e-02, PNorm = 165.5970, GNorm = 0.5110, lr_0 = 4.8760e-04
Loss = 2.1569e-02, PNorm = 165.6287, GNorm = 0.2570, lr_0 = 4.8727e-04
Loss = 2.6206e-02, PNorm = 165.6586, GNorm = 0.4524, lr_0 = 4.8693e-04
Loss = 2.6063e-02, PNorm = 165.6890, GNorm = 0.4832, lr_0 = 4.8660e-04
Loss = 2.3932e-02, PNorm = 165.7242, GNorm = 0.5302, lr_0 = 4.8627e-04
Loss = 2.5772e-02, PNorm = 165.7553, GNorm = 0.2648, lr_0 = 4.8593e-04
Loss = 2.8936e-02, PNorm = 165.7859, GNorm = 0.4322, lr_0 = 4.8560e-04
Loss = 2.9650e-02, PNorm = 165.8246, GNorm = 0.2452, lr_0 = 4.8527e-04
Loss = 2.3371e-02, PNorm = 165.8613, GNorm = 0.3352, lr_0 = 4.8494e-04
Loss = 2.4932e-02, PNorm = 165.8978, GNorm = 0.5528, lr_0 = 4.8460e-04
Loss = 2.8843e-02, PNorm = 165.9289, GNorm = 0.4901, lr_0 = 4.8427e-04
Loss = 2.2327e-02, PNorm = 165.9567, GNorm = 0.3803, lr_0 = 4.8394e-04
Loss = 2.5042e-02, PNorm = 165.9880, GNorm = 0.3254, lr_0 = 4.8361e-04
Loss = 2.4244e-02, PNorm = 166.0225, GNorm = 0.3016, lr_0 = 4.8328e-04
Loss = 2.2781e-02, PNorm = 166.0591, GNorm = 0.4137, lr_0 = 4.8295e-04
Loss = 2.4696e-02, PNorm = 166.0900, GNorm = 0.2709, lr_0 = 4.8262e-04
Loss = 2.3955e-02, PNorm = 166.1210, GNorm = 0.5607, lr_0 = 4.8228e-04
Loss = 2.3104e-02, PNorm = 166.1500, GNorm = 0.4164, lr_0 = 4.8195e-04
Loss = 2.7034e-02, PNorm = 166.1801, GNorm = 0.6851, lr_0 = 4.8162e-04
Loss = 2.8006e-02, PNorm = 166.2144, GNorm = 0.2674, lr_0 = 4.8129e-04
Loss = 3.0135e-02, PNorm = 166.2533, GNorm = 0.3951, lr_0 = 4.8096e-04
Loss = 2.5381e-02, PNorm = 166.2862, GNorm = 0.5425, lr_0 = 4.8064e-04
Loss = 2.8658e-02, PNorm = 166.3142, GNorm = 0.7458, lr_0 = 4.8031e-04
Loss = 2.8728e-02, PNorm = 166.3444, GNorm = 0.3303, lr_0 = 4.7998e-04
Loss = 3.2344e-02, PNorm = 166.3792, GNorm = 0.2844, lr_0 = 4.7965e-04
Loss = 3.3009e-02, PNorm = 166.4115, GNorm = 0.4146, lr_0 = 4.7932e-04
Loss = 3.4779e-02, PNorm = 166.4503, GNorm = 0.5392, lr_0 = 4.7899e-04
Loss = 4.4402e-02, PNorm = 166.4948, GNorm = 0.4760, lr_0 = 4.7866e-04
Loss = 2.8113e-02, PNorm = 166.5409, GNorm = 0.4131, lr_0 = 4.7833e-04
Loss = 3.2031e-02, PNorm = 166.5830, GNorm = 0.2657, lr_0 = 4.7801e-04
Loss = 3.8260e-02, PNorm = 166.6238, GNorm = 1.4819, lr_0 = 4.7768e-04
Loss = 3.4087e-02, PNorm = 166.6676, GNorm = 0.3295, lr_0 = 4.7735e-04
Loss = 2.3263e-02, PNorm = 166.7048, GNorm = 0.2072, lr_0 = 4.7703e-04
Validation mae = 0.121779
Epoch 11
Loss = 2.3693e-02, PNorm = 166.7331, GNorm = 0.3866, lr_0 = 4.7670e-04
Loss = 2.5313e-02, PNorm = 166.7609, GNorm = 0.3166, lr_0 = 4.7637e-04
Loss = 2.5780e-02, PNorm = 166.7848, GNorm = 0.3924, lr_0 = 4.7605e-04
Loss = 2.1858e-02, PNorm = 166.8057, GNorm = 0.8391, lr_0 = 4.7572e-04
Loss = 2.2869e-02, PNorm = 166.8298, GNorm = 0.2972, lr_0 = 4.7539e-04
Loss = 1.8611e-02, PNorm = 166.8521, GNorm = 0.2645, lr_0 = 4.7507e-04
Loss = 1.9568e-02, PNorm = 166.8739, GNorm = 0.3262, lr_0 = 4.7474e-04
Loss = 1.9728e-02, PNorm = 166.8979, GNorm = 0.3050, lr_0 = 4.7442e-04
Loss = 2.4106e-02, PNorm = 166.9232, GNorm = 0.3342, lr_0 = 4.7409e-04
Loss = 1.9472e-02, PNorm = 166.9457, GNorm = 0.3806, lr_0 = 4.7377e-04
Loss = 2.3700e-02, PNorm = 166.9729, GNorm = 0.2343, lr_0 = 4.7344e-04
Loss = 3.6020e-02, PNorm = 166.9964, GNorm = 0.2717, lr_0 = 4.7312e-04
Loss = 1.9923e-02, PNorm = 167.0192, GNorm = 0.3081, lr_0 = 4.7279e-04
Loss = 2.1490e-02, PNorm = 167.0411, GNorm = 0.8074, lr_0 = 4.7247e-04
Loss = 2.3473e-02, PNorm = 167.0640, GNorm = 0.2627, lr_0 = 4.7215e-04
Loss = 3.4083e-02, PNorm = 167.0854, GNorm = 0.8531, lr_0 = 4.7182e-04
Loss = 2.1783e-02, PNorm = 167.1108, GNorm = 0.3711, lr_0 = 4.7150e-04
Loss = 3.2001e-02, PNorm = 167.1345, GNorm = 0.5498, lr_0 = 4.7118e-04
Loss = 2.4661e-02, PNorm = 167.1627, GNorm = 0.2735, lr_0 = 4.7085e-04
Loss = 3.0682e-02, PNorm = 167.1871, GNorm = 0.2982, lr_0 = 4.7053e-04
Loss = 2.6848e-02, PNorm = 167.2111, GNorm = 0.4353, lr_0 = 4.7021e-04
Loss = 2.1553e-02, PNorm = 167.2417, GNorm = 0.2451, lr_0 = 4.6989e-04
Loss = 2.1507e-02, PNorm = 167.2695, GNorm = 0.2063, lr_0 = 4.6957e-04
Loss = 2.3000e-02, PNorm = 167.2982, GNorm = 0.1990, lr_0 = 4.6924e-04
Loss = 2.1703e-02, PNorm = 167.3271, GNorm = 0.4679, lr_0 = 4.6892e-04
Loss = 2.5125e-02, PNorm = 167.3542, GNorm = 0.2055, lr_0 = 4.6860e-04
Loss = 2.2658e-02, PNorm = 167.3802, GNorm = 0.6655, lr_0 = 4.6828e-04
Loss = 2.5928e-02, PNorm = 167.4072, GNorm = 0.2408, lr_0 = 4.6796e-04
Loss = 2.0978e-02, PNorm = 167.4331, GNorm = 0.4231, lr_0 = 4.6764e-04
Loss = 1.9919e-02, PNorm = 167.4614, GNorm = 0.3551, lr_0 = 4.6732e-04
Loss = 2.1980e-02, PNorm = 167.4896, GNorm = 0.2244, lr_0 = 4.6700e-04
Loss = 2.5977e-02, PNorm = 167.5156, GNorm = 0.3384, lr_0 = 4.6668e-04
Loss = 2.6053e-02, PNorm = 167.5428, GNorm = 0.2711, lr_0 = 4.6636e-04
Loss = 2.5380e-02, PNorm = 167.5672, GNorm = 0.2123, lr_0 = 4.6604e-04
Loss = 2.4220e-02, PNorm = 167.5919, GNorm = 0.4350, lr_0 = 4.6572e-04
Loss = 2.6018e-02, PNorm = 167.6222, GNorm = 0.1677, lr_0 = 4.6540e-04
Loss = 2.4872e-02, PNorm = 167.6548, GNorm = 0.2127, lr_0 = 4.6508e-04
Loss = 2.5712e-02, PNorm = 167.6800, GNorm = 0.2540, lr_0 = 4.6476e-04
Loss = 2.7616e-02, PNorm = 167.7092, GNorm = 0.3475, lr_0 = 4.6445e-04
Loss = 2.1869e-02, PNorm = 167.7379, GNorm = 0.3124, lr_0 = 4.6413e-04
Loss = 3.0881e-02, PNorm = 167.7687, GNorm = 0.4172, lr_0 = 4.6381e-04
Loss = 2.7465e-02, PNorm = 167.7886, GNorm = 0.3631, lr_0 = 4.6349e-04
Loss = 3.0332e-02, PNorm = 167.8163, GNorm = 0.5282, lr_0 = 4.6317e-04
Loss = 2.2327e-02, PNorm = 167.8395, GNorm = 0.7351, lr_0 = 4.6286e-04
Loss = 1.8862e-02, PNorm = 167.8630, GNorm = 0.1917, lr_0 = 4.6254e-04
Loss = 2.1242e-02, PNorm = 167.8900, GNorm = 0.2401, lr_0 = 4.6222e-04
Loss = 1.8030e-02, PNorm = 167.9155, GNorm = 0.3911, lr_0 = 4.6191e-04
Loss = 2.8513e-02, PNorm = 167.9379, GNorm = 0.9412, lr_0 = 4.6159e-04
Loss = 2.6829e-02, PNorm = 167.9565, GNorm = 0.2983, lr_0 = 4.6127e-04
Loss = 2.2703e-02, PNorm = 167.9842, GNorm = 0.5237, lr_0 = 4.6096e-04
Loss = 2.0030e-02, PNorm = 168.0114, GNorm = 0.3915, lr_0 = 4.6064e-04
Loss = 1.9663e-02, PNorm = 168.0398, GNorm = 0.2729, lr_0 = 4.6033e-04
Loss = 2.0534e-02, PNorm = 168.0677, GNorm = 0.4106, lr_0 = 4.6001e-04
Loss = 2.3889e-02, PNorm = 168.0947, GNorm = 0.5239, lr_0 = 4.5970e-04
Loss = 2.3581e-02, PNorm = 168.1227, GNorm = 0.2593, lr_0 = 4.5938e-04
Loss = 2.7185e-02, PNorm = 168.1421, GNorm = 0.4477, lr_0 = 4.5907e-04
Loss = 2.3438e-02, PNorm = 168.1705, GNorm = 0.2530, lr_0 = 4.5875e-04
Loss = 2.2407e-02, PNorm = 168.1969, GNorm = 1.1427, lr_0 = 4.5844e-04
Loss = 2.1505e-02, PNorm = 168.2191, GNorm = 0.5556, lr_0 = 4.5812e-04
Loss = 2.7342e-02, PNorm = 168.2447, GNorm = 0.5869, lr_0 = 4.5781e-04
Loss = 2.2853e-02, PNorm = 168.2688, GNorm = 0.5952, lr_0 = 4.5750e-04
Loss = 2.1878e-02, PNorm = 168.2986, GNorm = 0.3184, lr_0 = 4.5718e-04
Loss = 2.5095e-02, PNorm = 168.3260, GNorm = 0.2550, lr_0 = 4.5687e-04
Loss = 1.7384e-02, PNorm = 168.3552, GNorm = 0.1978, lr_0 = 4.5656e-04
Loss = 2.4952e-02, PNorm = 168.3882, GNorm = 0.4295, lr_0 = 4.5624e-04
Loss = 2.3725e-02, PNorm = 168.4181, GNorm = 0.8133, lr_0 = 4.5593e-04
Loss = 2.0943e-02, PNorm = 168.4471, GNorm = 0.5559, lr_0 = 4.5562e-04
Loss = 2.4594e-02, PNorm = 168.4740, GNorm = 0.3405, lr_0 = 4.5531e-04
Loss = 2.2792e-02, PNorm = 168.4977, GNorm = 0.5529, lr_0 = 4.5499e-04
Loss = 1.9611e-02, PNorm = 168.5256, GNorm = 0.2768, lr_0 = 4.5468e-04
Loss = 2.4925e-02, PNorm = 168.5535, GNorm = 0.7094, lr_0 = 4.5437e-04
Loss = 2.8019e-02, PNorm = 168.5884, GNorm = 0.4956, lr_0 = 4.5406e-04
Loss = 2.2672e-02, PNorm = 168.6187, GNorm = 0.2566, lr_0 = 4.5375e-04
Loss = 2.2206e-02, PNorm = 168.6467, GNorm = 0.1801, lr_0 = 4.5344e-04
Loss = 2.7853e-02, PNorm = 168.6692, GNorm = 0.5777, lr_0 = 4.5313e-04
Loss = 1.9504e-02, PNorm = 168.6953, GNorm = 0.3074, lr_0 = 4.5282e-04
Loss = 1.9313e-02, PNorm = 168.7212, GNorm = 0.2309, lr_0 = 4.5251e-04
Loss = 2.3018e-02, PNorm = 168.7495, GNorm = 0.8449, lr_0 = 4.5220e-04
Loss = 2.0071e-02, PNorm = 168.7799, GNorm = 0.3778, lr_0 = 4.5189e-04
Loss = 1.9026e-02, PNorm = 168.8094, GNorm = 0.3067, lr_0 = 4.5158e-04
Loss = 2.2648e-02, PNorm = 168.8354, GNorm = 0.9314, lr_0 = 4.5127e-04
Loss = 2.1765e-02, PNorm = 168.8629, GNorm = 0.1913, lr_0 = 4.5096e-04
Loss = 2.1418e-02, PNorm = 168.8881, GNorm = 0.4992, lr_0 = 4.5065e-04
Loss = 2.0103e-02, PNorm = 168.9143, GNorm = 0.3719, lr_0 = 4.5034e-04
Loss = 2.6392e-02, PNorm = 168.9390, GNorm = 0.3881, lr_0 = 4.5003e-04
Loss = 1.8787e-02, PNorm = 168.9666, GNorm = 0.3759, lr_0 = 4.4972e-04
Loss = 2.2701e-02, PNorm = 168.9928, GNorm = 0.5275, lr_0 = 4.4942e-04
Loss = 2.1451e-02, PNorm = 169.0176, GNorm = 0.4096, lr_0 = 4.4911e-04
Loss = 2.2932e-02, PNorm = 169.0467, GNorm = 0.4238, lr_0 = 4.4880e-04
Loss = 2.1623e-02, PNorm = 169.0742, GNorm = 0.2919, lr_0 = 4.4849e-04
Loss = 2.4130e-02, PNorm = 169.1036, GNorm = 0.3125, lr_0 = 4.4819e-04
Loss = 2.2754e-02, PNorm = 169.1357, GNorm = 0.3965, lr_0 = 4.4788e-04
Loss = 2.8528e-02, PNorm = 169.1623, GNorm = 0.5121, lr_0 = 4.4757e-04
Loss = 2.6643e-02, PNorm = 169.1946, GNorm = 0.3434, lr_0 = 4.4727e-04
Loss = 2.0624e-02, PNorm = 169.2274, GNorm = 0.4146, lr_0 = 4.4696e-04
Loss = 2.4795e-02, PNorm = 169.2569, GNorm = 0.2563, lr_0 = 4.4665e-04
Loss = 2.2202e-02, PNorm = 169.2894, GNorm = 0.4169, lr_0 = 4.4635e-04
Loss = 2.1550e-02, PNorm = 169.3220, GNorm = 0.2836, lr_0 = 4.4604e-04
Loss = 2.4932e-02, PNorm = 169.3500, GNorm = 0.2556, lr_0 = 4.4574e-04
Loss = 2.9134e-02, PNorm = 169.3758, GNorm = 0.5661, lr_0 = 4.4543e-04
Loss = 2.3223e-02, PNorm = 169.4081, GNorm = 0.2802, lr_0 = 4.4513e-04
Loss = 2.8575e-02, PNorm = 169.4372, GNorm = 0.5594, lr_0 = 4.4482e-04
Loss = 2.2177e-02, PNorm = 169.4694, GNorm = 0.1689, lr_0 = 4.4452e-04
Loss = 2.4647e-02, PNorm = 169.5027, GNorm = 0.6038, lr_0 = 4.4421e-04
Loss = 2.0767e-02, PNorm = 169.5303, GNorm = 0.3579, lr_0 = 4.4391e-04
Loss = 2.5006e-02, PNorm = 169.5578, GNorm = 0.3412, lr_0 = 4.4360e-04
Loss = 2.0648e-02, PNorm = 169.5893, GNorm = 0.5385, lr_0 = 4.4330e-04
Loss = 1.9173e-02, PNorm = 169.6194, GNorm = 0.3898, lr_0 = 4.4299e-04
Loss = 2.0143e-02, PNorm = 169.6434, GNorm = 0.2823, lr_0 = 4.4269e-04
Loss = 1.8039e-02, PNorm = 169.6699, GNorm = 0.2557, lr_0 = 4.4239e-04
Loss = 2.7893e-02, PNorm = 169.6963, GNorm = 0.3480, lr_0 = 4.4209e-04
Loss = 2.1463e-02, PNorm = 169.7249, GNorm = 0.4521, lr_0 = 4.4178e-04
Loss = 3.5147e-02, PNorm = 169.7599, GNorm = 0.6920, lr_0 = 4.4148e-04
Loss = 1.9936e-02, PNorm = 169.7896, GNorm = 0.4264, lr_0 = 4.4118e-04
Loss = 2.4988e-02, PNorm = 169.8149, GNorm = 0.5185, lr_0 = 4.4088e-04
Loss = 1.9379e-02, PNorm = 169.8415, GNorm = 0.1634, lr_0 = 4.4057e-04
Loss = 2.8283e-02, PNorm = 169.8736, GNorm = 0.1943, lr_0 = 4.4027e-04
Loss = 2.0423e-02, PNorm = 169.9024, GNorm = 0.7961, lr_0 = 4.3997e-04
Loss = 2.2072e-02, PNorm = 169.9310, GNorm = 0.4957, lr_0 = 4.3967e-04
Loss = 2.4908e-02, PNorm = 169.9605, GNorm = 0.5919, lr_0 = 4.3937e-04
Validation mae = 0.122108
Epoch 12
Loss = 2.3309e-02, PNorm = 169.9868, GNorm = 0.5066, lr_0 = 4.3907e-04
Loss = 2.3014e-02, PNorm = 170.0142, GNorm = 0.6916, lr_0 = 4.3877e-04
Loss = 2.0802e-02, PNorm = 170.0364, GNorm = 0.3113, lr_0 = 4.3846e-04
Loss = 2.1116e-02, PNorm = 170.0594, GNorm = 0.3479, lr_0 = 4.3816e-04
Loss = 2.4110e-02, PNorm = 170.0789, GNorm = 0.1867, lr_0 = 4.3786e-04
Loss = 2.0995e-02, PNorm = 170.1004, GNorm = 0.8381, lr_0 = 4.3756e-04
Loss = 1.7821e-02, PNorm = 170.1195, GNorm = 0.3010, lr_0 = 4.3726e-04
Loss = 1.9822e-02, PNorm = 170.1397, GNorm = 0.2719, lr_0 = 4.3696e-04
Loss = 1.7043e-02, PNorm = 170.1663, GNorm = 0.3890, lr_0 = 4.3667e-04
Loss = 2.2769e-02, PNorm = 170.1893, GNorm = 0.3891, lr_0 = 4.3637e-04
Loss = 2.4500e-02, PNorm = 170.2053, GNorm = 0.2112, lr_0 = 4.3607e-04
Loss = 1.6628e-02, PNorm = 170.2285, GNorm = 0.2756, lr_0 = 4.3577e-04
Loss = 1.7760e-02, PNorm = 170.2466, GNorm = 0.2863, lr_0 = 4.3547e-04
Loss = 1.8708e-02, PNorm = 170.2648, GNorm = 0.4135, lr_0 = 4.3517e-04
Loss = 2.0246e-02, PNorm = 170.2858, GNorm = 0.3273, lr_0 = 4.3487e-04
Loss = 2.2055e-02, PNorm = 170.3112, GNorm = 0.3048, lr_0 = 4.3458e-04
Loss = 2.6235e-02, PNorm = 170.3298, GNorm = 0.4841, lr_0 = 4.3428e-04
Loss = 1.8268e-02, PNorm = 170.3493, GNorm = 0.2905, lr_0 = 4.3398e-04
Loss = 2.0613e-02, PNorm = 170.3715, GNorm = 0.8305, lr_0 = 4.3368e-04
Loss = 1.7357e-02, PNorm = 170.3954, GNorm = 0.5281, lr_0 = 4.3339e-04
Loss = 1.7621e-02, PNorm = 170.4141, GNorm = 0.3543, lr_0 = 4.3309e-04
Loss = 1.8090e-02, PNorm = 170.4330, GNorm = 0.3121, lr_0 = 4.3279e-04
Loss = 1.7754e-02, PNorm = 170.4543, GNorm = 0.4248, lr_0 = 4.3250e-04
Loss = 1.7706e-02, PNorm = 170.4772, GNorm = 0.2821, lr_0 = 4.3220e-04
Loss = 1.7256e-02, PNorm = 170.4949, GNorm = 0.4736, lr_0 = 4.3190e-04
Loss = 2.2506e-02, PNorm = 170.5153, GNorm = 0.3974, lr_0 = 4.3161e-04
Loss = 1.5750e-02, PNorm = 170.5324, GNorm = 0.1664, lr_0 = 4.3131e-04
Loss = 1.6729e-02, PNorm = 170.5539, GNorm = 0.2376, lr_0 = 4.3102e-04
Loss = 1.9262e-02, PNorm = 170.5770, GNorm = 0.6084, lr_0 = 4.3072e-04
Loss = 1.6757e-02, PNorm = 170.5987, GNorm = 0.3959, lr_0 = 4.3043e-04
Loss = 1.6368e-02, PNorm = 170.6174, GNorm = 0.1958, lr_0 = 4.3013e-04
Loss = 2.2711e-02, PNorm = 170.6346, GNorm = 0.3128, lr_0 = 4.2984e-04
Loss = 1.9835e-02, PNorm = 170.6517, GNorm = 0.2635, lr_0 = 4.2954e-04
Loss = 1.4671e-02, PNorm = 170.6717, GNorm = 0.3549, lr_0 = 4.2925e-04
Loss = 1.6877e-02, PNorm = 170.6925, GNorm = 0.1723, lr_0 = 4.2895e-04
Loss = 1.9768e-02, PNorm = 170.7153, GNorm = 0.3635, lr_0 = 4.2866e-04
Loss = 2.0856e-02, PNorm = 170.7345, GNorm = 0.2287, lr_0 = 4.2837e-04
Loss = 2.0189e-02, PNorm = 170.7573, GNorm = 0.6189, lr_0 = 4.2807e-04
Loss = 2.0370e-02, PNorm = 170.7805, GNorm = 0.2623, lr_0 = 4.2778e-04
Loss = 1.6897e-02, PNorm = 170.7990, GNorm = 0.5482, lr_0 = 4.2749e-04
Loss = 1.6913e-02, PNorm = 170.8174, GNorm = 0.1688, lr_0 = 4.2719e-04
Loss = 2.1521e-02, PNorm = 170.8375, GNorm = 0.3036, lr_0 = 4.2690e-04
Loss = 1.7013e-02, PNorm = 170.8597, GNorm = 0.4185, lr_0 = 4.2661e-04
Loss = 2.1450e-02, PNorm = 170.8802, GNorm = 0.2594, lr_0 = 4.2632e-04
Loss = 1.7478e-02, PNorm = 170.9017, GNorm = 0.2741, lr_0 = 4.2602e-04
Loss = 2.2039e-02, PNorm = 170.9267, GNorm = 0.5890, lr_0 = 4.2573e-04
Loss = 2.0728e-02, PNorm = 170.9523, GNorm = 0.4027, lr_0 = 4.2544e-04
Loss = 1.6915e-02, PNorm = 170.9768, GNorm = 0.5236, lr_0 = 4.2515e-04
Loss = 2.0861e-02, PNorm = 171.0020, GNorm = 0.1939, lr_0 = 4.2486e-04
Loss = 1.9501e-02, PNorm = 171.0253, GNorm = 0.3036, lr_0 = 4.2457e-04
Loss = 1.7917e-02, PNorm = 171.0486, GNorm = 0.2139, lr_0 = 4.2428e-04
Loss = 1.8577e-02, PNorm = 171.0714, GNorm = 0.1915, lr_0 = 4.2399e-04
Loss = 2.2333e-02, PNorm = 171.0974, GNorm = 0.1929, lr_0 = 4.2370e-04
Loss = 1.6265e-02, PNorm = 171.1215, GNorm = 0.5723, lr_0 = 4.2340e-04
Loss = 1.9138e-02, PNorm = 171.1452, GNorm = 0.8022, lr_0 = 4.2311e-04
Loss = 2.1846e-02, PNorm = 171.1700, GNorm = 0.2238, lr_0 = 4.2283e-04
Loss = 2.1338e-02, PNorm = 171.1901, GNorm = 0.3338, lr_0 = 4.2254e-04
Loss = 1.8205e-02, PNorm = 171.2131, GNorm = 0.2596, lr_0 = 4.2225e-04
Loss = 1.6209e-02, PNorm = 171.2336, GNorm = 0.2030, lr_0 = 4.2196e-04
Loss = 1.7315e-02, PNorm = 171.2539, GNorm = 0.4811, lr_0 = 4.2167e-04
Loss = 1.9714e-02, PNorm = 171.2738, GNorm = 0.5861, lr_0 = 4.2138e-04
Loss = 1.8862e-02, PNorm = 171.2972, GNorm = 0.2690, lr_0 = 4.2109e-04
Loss = 1.7147e-02, PNorm = 171.3210, GNorm = 0.4145, lr_0 = 4.2080e-04
Loss = 2.0901e-02, PNorm = 171.3451, GNorm = 0.2152, lr_0 = 4.2051e-04
Loss = 1.6171e-02, PNorm = 171.3697, GNorm = 0.2143, lr_0 = 4.2023e-04
Loss = 1.7889e-02, PNorm = 171.3919, GNorm = 0.5688, lr_0 = 4.1994e-04
Loss = 1.7655e-02, PNorm = 171.4153, GNorm = 0.2106, lr_0 = 4.1965e-04
Loss = 1.9579e-02, PNorm = 171.4375, GNorm = 0.2801, lr_0 = 4.1936e-04
Loss = 2.3273e-02, PNorm = 171.4615, GNorm = 0.3407, lr_0 = 4.1907e-04
Loss = 2.3519e-02, PNorm = 171.4869, GNorm = 0.3068, lr_0 = 4.1879e-04
Loss = 2.1938e-02, PNorm = 171.5089, GNorm = 0.2296, lr_0 = 4.1850e-04
Loss = 2.3075e-02, PNorm = 171.5309, GNorm = 0.3541, lr_0 = 4.1821e-04
Loss = 1.8547e-02, PNorm = 171.5580, GNorm = 0.2684, lr_0 = 4.1793e-04
Loss = 2.3293e-02, PNorm = 171.5825, GNorm = 0.2281, lr_0 = 4.1764e-04
Loss = 1.7314e-02, PNorm = 171.6072, GNorm = 0.3438, lr_0 = 4.1736e-04
Loss = 1.7651e-02, PNorm = 171.6289, GNorm = 0.2853, lr_0 = 4.1707e-04
Loss = 2.0542e-02, PNorm = 171.6479, GNorm = 0.3547, lr_0 = 4.1678e-04
Loss = 2.4975e-02, PNorm = 171.6720, GNorm = 0.3599, lr_0 = 4.1650e-04
Loss = 2.4058e-02, PNorm = 171.6986, GNorm = 0.3864, lr_0 = 4.1621e-04
Loss = 1.4740e-02, PNorm = 171.7227, GNorm = 0.5632, lr_0 = 4.1593e-04
Loss = 1.7935e-02, PNorm = 171.7456, GNorm = 0.3778, lr_0 = 4.1564e-04
Loss = 2.1341e-02, PNorm = 171.7656, GNorm = 0.4740, lr_0 = 4.1536e-04
Loss = 1.4992e-02, PNorm = 171.7877, GNorm = 0.2959, lr_0 = 4.1507e-04
Loss = 2.3923e-02, PNorm = 171.8117, GNorm = 0.3742, lr_0 = 4.1479e-04
Loss = 1.7727e-02, PNorm = 171.8398, GNorm = 0.6806, lr_0 = 4.1450e-04
Loss = 2.5687e-02, PNorm = 171.8622, GNorm = 0.2402, lr_0 = 4.1422e-04
Loss = 2.3907e-02, PNorm = 171.8873, GNorm = 0.2646, lr_0 = 4.1394e-04
Loss = 1.9997e-02, PNorm = 171.9112, GNorm = 0.2331, lr_0 = 4.1365e-04
Loss = 1.9866e-02, PNorm = 171.9341, GNorm = 0.1617, lr_0 = 4.1337e-04
Loss = 2.4469e-02, PNorm = 171.9596, GNorm = 0.4517, lr_0 = 4.1309e-04
Loss = 2.0182e-02, PNorm = 171.9852, GNorm = 0.2902, lr_0 = 4.1280e-04
Loss = 1.4849e-02, PNorm = 172.0073, GNorm = 0.4077, lr_0 = 4.1252e-04
Loss = 1.8312e-02, PNorm = 172.0338, GNorm = 0.2360, lr_0 = 4.1224e-04
Loss = 1.9644e-02, PNorm = 172.0587, GNorm = 0.3278, lr_0 = 4.1196e-04
Loss = 1.5986e-02, PNorm = 172.0830, GNorm = 0.3240, lr_0 = 4.1167e-04
Loss = 1.8700e-02, PNorm = 172.1083, GNorm = 0.1810, lr_0 = 4.1139e-04
Loss = 2.6044e-02, PNorm = 172.1311, GNorm = 0.3820, lr_0 = 4.1111e-04
Loss = 1.9690e-02, PNorm = 172.1550, GNorm = 0.3959, lr_0 = 4.1083e-04
Loss = 1.8401e-02, PNorm = 172.1785, GNorm = 0.4338, lr_0 = 4.1055e-04
Loss = 1.7227e-02, PNorm = 172.1959, GNorm = 0.2753, lr_0 = 4.1027e-04
Loss = 1.8938e-02, PNorm = 172.2185, GNorm = 0.1707, lr_0 = 4.0998e-04
Loss = 1.7173e-02, PNorm = 172.2438, GNorm = 0.2390, lr_0 = 4.0970e-04
Loss = 2.1262e-02, PNorm = 172.2670, GNorm = 1.2190, lr_0 = 4.0942e-04
Loss = 1.7573e-02, PNorm = 172.2880, GNorm = 0.1709, lr_0 = 4.0914e-04
Loss = 2.1401e-02, PNorm = 172.3096, GNorm = 0.2006, lr_0 = 4.0886e-04
Loss = 1.7707e-02, PNorm = 172.3304, GNorm = 0.3421, lr_0 = 4.0858e-04
Loss = 1.9268e-02, PNorm = 172.3483, GNorm = 0.1976, lr_0 = 4.0830e-04
Loss = 1.6679e-02, PNorm = 172.3693, GNorm = 0.3904, lr_0 = 4.0802e-04
Loss = 2.0642e-02, PNorm = 172.3884, GNorm = 0.1660, lr_0 = 4.0774e-04
Loss = 1.7349e-02, PNorm = 172.4105, GNorm = 0.1612, lr_0 = 4.0746e-04
Loss = 2.4604e-02, PNorm = 172.4323, GNorm = 0.2311, lr_0 = 4.0718e-04
Loss = 2.0669e-02, PNorm = 172.4583, GNorm = 0.4909, lr_0 = 4.0691e-04
Loss = 1.7894e-02, PNorm = 172.4874, GNorm = 0.2857, lr_0 = 4.0663e-04
Loss = 1.7298e-02, PNorm = 172.5116, GNorm = 0.6467, lr_0 = 4.0635e-04
Loss = 2.1191e-02, PNorm = 172.5383, GNorm = 0.4002, lr_0 = 4.0607e-04
Loss = 1.8036e-02, PNorm = 172.5652, GNorm = 0.2808, lr_0 = 4.0579e-04
Loss = 3.3713e-02, PNorm = 172.5907, GNorm = 0.2831, lr_0 = 4.0551e-04
Loss = 2.2812e-02, PNorm = 172.6158, GNorm = 0.3351, lr_0 = 4.0524e-04
Loss = 1.9498e-02, PNorm = 172.6394, GNorm = 0.3217, lr_0 = 4.0496e-04
Loss = 1.7752e-02, PNorm = 172.6614, GNorm = 0.1789, lr_0 = 4.0468e-04
Validation mae = 0.122321
Epoch 13
Loss = 1.6791e-02, PNorm = 172.6785, GNorm = 0.3774, lr_0 = 4.0440e-04
Loss = 2.0079e-02, PNorm = 172.6989, GNorm = 0.2178, lr_0 = 4.0413e-04
Loss = 2.0379e-02, PNorm = 172.7179, GNorm = 0.2111, lr_0 = 4.0385e-04
Loss = 1.5050e-02, PNorm = 172.7317, GNorm = 0.5872, lr_0 = 4.0357e-04
Loss = 1.8321e-02, PNorm = 172.7473, GNorm = 0.2894, lr_0 = 4.0330e-04
Loss = 1.4659e-02, PNorm = 172.7669, GNorm = 0.2450, lr_0 = 4.0302e-04
Loss = 1.5667e-02, PNorm = 172.7878, GNorm = 0.3928, lr_0 = 4.0274e-04
Loss = 1.2820e-02, PNorm = 172.8061, GNorm = 0.2163, lr_0 = 4.0247e-04
Loss = 1.9601e-02, PNorm = 172.8217, GNorm = 0.4797, lr_0 = 4.0219e-04
Loss = 1.8450e-02, PNorm = 172.8380, GNorm = 0.2889, lr_0 = 4.0192e-04
Loss = 1.5137e-02, PNorm = 172.8574, GNorm = 0.1714, lr_0 = 4.0164e-04
Loss = 1.5654e-02, PNorm = 172.8716, GNorm = 0.3070, lr_0 = 4.0137e-04
Loss = 1.8676e-02, PNorm = 172.8885, GNorm = 0.2495, lr_0 = 4.0109e-04
Loss = 1.7102e-02, PNorm = 172.9056, GNorm = 0.1728, lr_0 = 4.0082e-04
Loss = 1.8201e-02, PNorm = 172.9203, GNorm = 0.3731, lr_0 = 4.0054e-04
Loss = 1.3382e-02, PNorm = 172.9367, GNorm = 0.3785, lr_0 = 4.0027e-04
Loss = 1.5440e-02, PNorm = 172.9566, GNorm = 0.1704, lr_0 = 3.9999e-04
Loss = 1.5635e-02, PNorm = 172.9737, GNorm = 0.3459, lr_0 = 3.9972e-04
Loss = 1.3578e-02, PNorm = 172.9929, GNorm = 0.3111, lr_0 = 3.9945e-04
Loss = 1.5219e-02, PNorm = 173.0101, GNorm = 0.5105, lr_0 = 3.9917e-04
Loss = 1.3440e-02, PNorm = 173.0278, GNorm = 0.1965, lr_0 = 3.9890e-04
Loss = 1.6307e-02, PNorm = 173.0448, GNorm = 0.5053, lr_0 = 3.9863e-04
Loss = 1.5195e-02, PNorm = 173.0619, GNorm = 0.2535, lr_0 = 3.9835e-04
Loss = 1.3598e-02, PNorm = 173.0770, GNorm = 0.4009, lr_0 = 3.9808e-04
Loss = 2.0161e-02, PNorm = 173.0914, GNorm = 0.2485, lr_0 = 3.9781e-04
Loss = 1.4707e-02, PNorm = 173.1071, GNorm = 0.2822, lr_0 = 3.9753e-04
Loss = 2.4440e-02, PNorm = 173.1248, GNorm = 0.3506, lr_0 = 3.9726e-04
Loss = 1.8387e-02, PNorm = 173.1415, GNorm = 0.3938, lr_0 = 3.9699e-04
Loss = 1.4938e-02, PNorm = 173.1614, GNorm = 0.3059, lr_0 = 3.9672e-04
Loss = 1.5347e-02, PNorm = 173.1796, GNorm = 0.1696, lr_0 = 3.9645e-04
Loss = 1.3535e-02, PNorm = 173.1953, GNorm = 0.1952, lr_0 = 3.9617e-04
Loss = 1.0831e-02, PNorm = 173.2097, GNorm = 0.1868, lr_0 = 3.9590e-04
Loss = 2.0427e-02, PNorm = 173.2259, GNorm = 0.9285, lr_0 = 3.9563e-04
Loss = 1.5839e-02, PNorm = 173.2421, GNorm = 0.5914, lr_0 = 3.9536e-04
Loss = 1.3415e-02, PNorm = 173.2616, GNorm = 0.2520, lr_0 = 3.9509e-04
Loss = 1.8641e-02, PNorm = 173.2797, GNorm = 0.3054, lr_0 = 3.9482e-04
Loss = 1.4675e-02, PNorm = 173.2963, GNorm = 0.2000, lr_0 = 3.9455e-04
Loss = 1.4132e-02, PNorm = 173.3164, GNorm = 0.5751, lr_0 = 3.9428e-04
Loss = 2.0096e-02, PNorm = 173.3343, GNorm = 0.3347, lr_0 = 3.9401e-04
Loss = 1.3330e-02, PNorm = 173.3514, GNorm = 0.3897, lr_0 = 3.9374e-04
Loss = 1.4129e-02, PNorm = 173.3696, GNorm = 0.4350, lr_0 = 3.9347e-04
Loss = 1.3255e-02, PNorm = 173.3861, GNorm = 0.1494, lr_0 = 3.9320e-04
Loss = 1.8318e-02, PNorm = 173.4043, GNorm = 0.1877, lr_0 = 3.9293e-04
Loss = 1.3383e-02, PNorm = 173.4207, GNorm = 0.5101, lr_0 = 3.9266e-04
Loss = 2.0372e-02, PNorm = 173.4355, GNorm = 0.2606, lr_0 = 3.9239e-04
Loss = 1.8223e-02, PNorm = 173.4495, GNorm = 0.2202, lr_0 = 3.9212e-04
Loss = 1.5661e-02, PNorm = 173.4657, GNorm = 0.3353, lr_0 = 3.9185e-04
Loss = 1.8711e-02, PNorm = 173.4831, GNorm = 0.3746, lr_0 = 3.9159e-04
Loss = 1.4319e-02, PNorm = 173.5068, GNorm = 0.1680, lr_0 = 3.9132e-04
Loss = 1.8413e-02, PNorm = 173.5287, GNorm = 0.3116, lr_0 = 3.9105e-04
Loss = 1.7316e-02, PNorm = 173.5454, GNorm = 0.2148, lr_0 = 3.9078e-04
Loss = 2.1828e-02, PNorm = 173.5635, GNorm = 0.2268, lr_0 = 3.9051e-04
Loss = 1.8656e-02, PNorm = 173.5861, GNorm = 0.4338, lr_0 = 3.9025e-04
Loss = 1.5143e-02, PNorm = 173.6059, GNorm = 0.6531, lr_0 = 3.8998e-04
Loss = 2.0310e-02, PNorm = 173.6259, GNorm = 0.3317, lr_0 = 3.8971e-04
Loss = 1.7159e-02, PNorm = 173.6449, GNorm = 0.5078, lr_0 = 3.8945e-04
Loss = 1.3896e-02, PNorm = 173.6634, GNorm = 0.3239, lr_0 = 3.8918e-04
Loss = 1.2382e-02, PNorm = 173.6808, GNorm = 0.3130, lr_0 = 3.8891e-04
Loss = 1.2864e-02, PNorm = 173.7001, GNorm = 0.2241, lr_0 = 3.8865e-04
Loss = 1.3231e-02, PNorm = 173.7189, GNorm = 0.1825, lr_0 = 3.8838e-04
Loss = 1.7665e-02, PNorm = 173.7369, GNorm = 0.2651, lr_0 = 3.8811e-04
Loss = 1.3181e-02, PNorm = 173.7544, GNorm = 0.1686, lr_0 = 3.8785e-04
Loss = 1.4872e-02, PNorm = 173.7726, GNorm = 0.2289, lr_0 = 3.8758e-04
Loss = 1.1777e-02, PNorm = 173.7902, GNorm = 0.2424, lr_0 = 3.8732e-04
Loss = 1.4228e-02, PNorm = 173.8035, GNorm = 0.1394, lr_0 = 3.8705e-04
Loss = 1.3681e-02, PNorm = 173.8190, GNorm = 0.1679, lr_0 = 3.8679e-04
Loss = 1.3036e-02, PNorm = 173.8345, GNorm = 0.2718, lr_0 = 3.8652e-04
Loss = 1.3114e-02, PNorm = 173.8553, GNorm = 0.3078, lr_0 = 3.8626e-04
Loss = 2.0136e-02, PNorm = 173.8784, GNorm = 0.3441, lr_0 = 3.8599e-04
Loss = 1.3603e-02, PNorm = 173.8999, GNorm = 0.1733, lr_0 = 3.8573e-04
Loss = 1.3785e-02, PNorm = 173.9174, GNorm = 0.3022, lr_0 = 3.8546e-04
Loss = 1.6251e-02, PNorm = 173.9380, GNorm = 0.2260, lr_0 = 3.8520e-04
Loss = 1.7304e-02, PNorm = 173.9573, GNorm = 0.8656, lr_0 = 3.8493e-04
Loss = 1.3856e-02, PNorm = 173.9746, GNorm = 0.2266, lr_0 = 3.8467e-04
Loss = 2.0721e-02, PNorm = 173.9974, GNorm = 0.3696, lr_0 = 3.8441e-04
Loss = 1.6442e-02, PNorm = 174.0192, GNorm = 0.2630, lr_0 = 3.8414e-04
Loss = 1.7511e-02, PNorm = 174.0383, GNorm = 0.3377, lr_0 = 3.8388e-04
Loss = 1.3930e-02, PNorm = 174.0599, GNorm = 0.4078, lr_0 = 3.8362e-04
Loss = 1.9611e-02, PNorm = 174.0759, GNorm = 0.3485, lr_0 = 3.8336e-04
Loss = 1.3170e-02, PNorm = 174.0944, GNorm = 0.2323, lr_0 = 3.8309e-04
Loss = 1.7901e-02, PNorm = 174.1132, GNorm = 0.2659, lr_0 = 3.8283e-04
Loss = 1.6613e-02, PNorm = 174.1388, GNorm = 0.4176, lr_0 = 3.8257e-04
Loss = 1.5481e-02, PNorm = 174.1589, GNorm = 0.1911, lr_0 = 3.8231e-04
Loss = 2.6697e-02, PNorm = 174.1801, GNorm = 0.4830, lr_0 = 3.8204e-04
Loss = 1.6313e-02, PNorm = 174.1983, GNorm = 0.3333, lr_0 = 3.8178e-04
Loss = 2.2516e-02, PNorm = 174.2187, GNorm = 0.2618, lr_0 = 3.8152e-04
Loss = 1.7231e-02, PNorm = 174.2393, GNorm = 0.2530, lr_0 = 3.8126e-04
Loss = 1.6296e-02, PNorm = 174.2576, GNorm = 0.6838, lr_0 = 3.8100e-04
Loss = 1.3776e-02, PNorm = 174.2766, GNorm = 0.2693, lr_0 = 3.8074e-04
Loss = 1.2809e-02, PNorm = 174.2971, GNorm = 0.1881, lr_0 = 3.8048e-04
Loss = 1.3311e-02, PNorm = 174.3173, GNorm = 0.2012, lr_0 = 3.8022e-04
Loss = 1.4211e-02, PNorm = 174.3392, GNorm = 0.2311, lr_0 = 3.7995e-04
Loss = 2.0733e-02, PNorm = 174.3596, GNorm = 0.2461, lr_0 = 3.7969e-04
Loss = 1.8838e-02, PNorm = 174.3822, GNorm = 0.3622, lr_0 = 3.7943e-04
Loss = 2.5718e-02, PNorm = 174.4040, GNorm = 0.5448, lr_0 = 3.7917e-04
Loss = 1.4551e-02, PNorm = 174.4228, GNorm = 0.2603, lr_0 = 3.7891e-04
Loss = 3.1250e-02, PNorm = 174.4425, GNorm = 0.4350, lr_0 = 3.7866e-04
Loss = 2.2881e-02, PNorm = 174.4664, GNorm = 0.4479, lr_0 = 3.7840e-04
Loss = 1.5612e-02, PNorm = 174.4907, GNorm = 0.2988, lr_0 = 3.7814e-04
Loss = 1.6582e-02, PNorm = 174.5178, GNorm = 0.2400, lr_0 = 3.7788e-04
Loss = 1.6143e-02, PNorm = 174.5371, GNorm = 0.4588, lr_0 = 3.7762e-04
Loss = 1.6873e-02, PNorm = 174.5595, GNorm = 0.1989, lr_0 = 3.7736e-04
Loss = 1.2967e-02, PNorm = 174.5842, GNorm = 0.2668, lr_0 = 3.7710e-04
Loss = 1.5255e-02, PNorm = 174.6054, GNorm = 0.2203, lr_0 = 3.7684e-04
Loss = 1.5738e-02, PNorm = 174.6245, GNorm = 0.3269, lr_0 = 3.7659e-04
Loss = 1.6593e-02, PNorm = 174.6425, GNorm = 0.6068, lr_0 = 3.7633e-04
Loss = 1.7851e-02, PNorm = 174.6627, GNorm = 0.3774, lr_0 = 3.7607e-04
Loss = 1.6161e-02, PNorm = 174.6804, GNorm = 0.7419, lr_0 = 3.7581e-04
Loss = 1.5602e-02, PNorm = 174.6969, GNorm = 0.4923, lr_0 = 3.7555e-04
Loss = 2.3092e-02, PNorm = 174.7156, GNorm = 0.1490, lr_0 = 3.7530e-04
Loss = 2.5751e-02, PNorm = 174.7326, GNorm = 0.3723, lr_0 = 3.7504e-04
Loss = 1.4189e-02, PNorm = 174.7499, GNorm = 0.1898, lr_0 = 3.7478e-04
Loss = 1.2700e-02, PNorm = 174.7680, GNorm = 0.2419, lr_0 = 3.7453e-04
Loss = 1.5116e-02, PNorm = 174.7885, GNorm = 0.2823, lr_0 = 3.7427e-04
Loss = 1.7751e-02, PNorm = 174.8111, GNorm = 0.2871, lr_0 = 3.7401e-04
Loss = 1.3270e-02, PNorm = 174.8311, GNorm = 0.1836, lr_0 = 3.7376e-04
Loss = 1.7430e-02, PNorm = 174.8454, GNorm = 0.2171, lr_0 = 3.7350e-04
Loss = 1.7611e-02, PNorm = 174.8610, GNorm = 0.2498, lr_0 = 3.7325e-04
Loss = 1.3521e-02, PNorm = 174.8784, GNorm = 0.2171, lr_0 = 3.7299e-04
Loss = 1.5788e-02, PNorm = 174.8986, GNorm = 0.2920, lr_0 = 3.7273e-04
Validation mae = 0.121659
Epoch 14
Loss = 1.6393e-02, PNorm = 174.9155, GNorm = 0.2368, lr_0 = 3.7248e-04
Loss = 1.6051e-02, PNorm = 174.9333, GNorm = 0.3587, lr_0 = 3.7222e-04
Loss = 1.5388e-02, PNorm = 174.9452, GNorm = 0.2714, lr_0 = 3.7197e-04
Loss = 1.2762e-02, PNorm = 174.9564, GNorm = 0.4083, lr_0 = 3.7171e-04
Loss = 1.3909e-02, PNorm = 174.9694, GNorm = 0.2211, lr_0 = 3.7146e-04
Loss = 1.6696e-02, PNorm = 174.9841, GNorm = 0.3439, lr_0 = 3.7120e-04
Loss = 1.2162e-02, PNorm = 174.9967, GNorm = 0.2108, lr_0 = 3.7095e-04
Loss = 1.4115e-02, PNorm = 175.0116, GNorm = 0.1761, lr_0 = 3.7070e-04
Loss = 1.2673e-02, PNorm = 175.0283, GNorm = 0.2951, lr_0 = 3.7044e-04
Loss = 1.3686e-02, PNorm = 175.0452, GNorm = 0.1365, lr_0 = 3.7019e-04
Loss = 1.2485e-02, PNorm = 175.0609, GNorm = 0.2328, lr_0 = 3.6993e-04
Loss = 1.0928e-02, PNorm = 175.0743, GNorm = 0.1333, lr_0 = 3.6968e-04
Loss = 1.7579e-02, PNorm = 175.0875, GNorm = 0.2819, lr_0 = 3.6943e-04
Loss = 1.5198e-02, PNorm = 175.1014, GNorm = 0.1938, lr_0 = 3.6917e-04
Loss = 1.3773e-02, PNorm = 175.1134, GNorm = 0.1364, lr_0 = 3.6892e-04
Loss = 1.6505e-02, PNorm = 175.1295, GNorm = 0.4789, lr_0 = 3.6867e-04
Loss = 2.0114e-02, PNorm = 175.1425, GNorm = 0.1842, lr_0 = 3.6842e-04
Loss = 1.2125e-02, PNorm = 175.1529, GNorm = 0.1667, lr_0 = 3.6816e-04
Loss = 1.3598e-02, PNorm = 175.1674, GNorm = 0.1465, lr_0 = 3.6791e-04
Loss = 1.1007e-02, PNorm = 175.1802, GNorm = 0.1277, lr_0 = 3.6766e-04
Loss = 1.5510e-02, PNorm = 175.1919, GNorm = 0.4954, lr_0 = 3.6741e-04
Loss = 1.5093e-02, PNorm = 175.2041, GNorm = 0.2490, lr_0 = 3.6716e-04
Loss = 1.0841e-02, PNorm = 175.2181, GNorm = 0.1935, lr_0 = 3.6690e-04
Loss = 1.1679e-02, PNorm = 175.2344, GNorm = 0.1629, lr_0 = 3.6665e-04
Loss = 1.4033e-02, PNorm = 175.2520, GNorm = 0.2205, lr_0 = 3.6640e-04
Loss = 1.3469e-02, PNorm = 175.2662, GNorm = 0.3204, lr_0 = 3.6615e-04
Loss = 1.7179e-02, PNorm = 175.2817, GNorm = 0.5044, lr_0 = 3.6590e-04
Loss = 1.3223e-02, PNorm = 175.2996, GNorm = 0.2187, lr_0 = 3.6565e-04
Loss = 1.0991e-02, PNorm = 175.3127, GNorm = 0.1779, lr_0 = 3.6540e-04
Loss = 1.5481e-02, PNorm = 175.3272, GNorm = 0.1542, lr_0 = 3.6515e-04
Loss = 1.3212e-02, PNorm = 175.3431, GNorm = 0.3627, lr_0 = 3.6490e-04
Loss = 9.7837e-03, PNorm = 175.3575, GNorm = 0.1700, lr_0 = 3.6465e-04
Loss = 1.2836e-02, PNorm = 175.3728, GNorm = 0.1274, lr_0 = 3.6440e-04
Loss = 1.7611e-02, PNorm = 175.3873, GNorm = 0.8294, lr_0 = 3.6415e-04
Loss = 1.2528e-02, PNorm = 175.4018, GNorm = 0.2965, lr_0 = 3.6390e-04
Loss = 1.1264e-02, PNorm = 175.4157, GNorm = 0.2863, lr_0 = 3.6365e-04
Loss = 1.8746e-02, PNorm = 175.4328, GNorm = 0.1422, lr_0 = 3.6340e-04
Loss = 1.1500e-02, PNorm = 175.4473, GNorm = 0.2747, lr_0 = 3.6315e-04
Loss = 1.0042e-02, PNorm = 175.4595, GNorm = 0.1862, lr_0 = 3.6290e-04
Loss = 1.2217e-02, PNorm = 175.4738, GNorm = 0.1450, lr_0 = 3.6266e-04
Loss = 1.3452e-02, PNorm = 175.4878, GNorm = 0.1778, lr_0 = 3.6241e-04
Loss = 1.6810e-02, PNorm = 175.5022, GNorm = 0.2123, lr_0 = 3.6216e-04
Loss = 1.5899e-02, PNorm = 175.5166, GNorm = 0.3307, lr_0 = 3.6191e-04
Loss = 1.2506e-02, PNorm = 175.5322, GNorm = 0.3879, lr_0 = 3.6166e-04
Loss = 1.1810e-02, PNorm = 175.5478, GNorm = 0.5617, lr_0 = 3.6141e-04
Loss = 1.0266e-02, PNorm = 175.5617, GNorm = 0.2331, lr_0 = 3.6117e-04
Loss = 1.9758e-02, PNorm = 175.5755, GNorm = 0.2708, lr_0 = 3.6092e-04
Loss = 1.4270e-02, PNorm = 175.5891, GNorm = 0.2188, lr_0 = 3.6067e-04
Loss = 1.1806e-02, PNorm = 175.6012, GNorm = 0.3336, lr_0 = 3.6043e-04
Loss = 1.3051e-02, PNorm = 175.6199, GNorm = 0.4165, lr_0 = 3.6018e-04
Loss = 1.2053e-02, PNorm = 175.6338, GNorm = 0.3444, lr_0 = 3.5993e-04
Loss = 1.3334e-02, PNorm = 175.6471, GNorm = 0.2566, lr_0 = 3.5969e-04
Loss = 1.1585e-02, PNorm = 175.6607, GNorm = 0.4774, lr_0 = 3.5944e-04
Loss = 1.3924e-02, PNorm = 175.6758, GNorm = 0.1861, lr_0 = 3.5919e-04
Loss = 2.0038e-02, PNorm = 175.6911, GNorm = 0.2478, lr_0 = 3.5895e-04
Loss = 1.1837e-02, PNorm = 175.7030, GNorm = 0.3108, lr_0 = 3.5870e-04
Loss = 1.6291e-02, PNorm = 175.7225, GNorm = 0.3274, lr_0 = 3.5845e-04
Loss = 1.5547e-02, PNorm = 175.7381, GNorm = 0.3023, lr_0 = 3.5821e-04
Loss = 1.2863e-02, PNorm = 175.7533, GNorm = 0.1610, lr_0 = 3.5796e-04
Loss = 1.5288e-02, PNorm = 175.7685, GNorm = 0.2794, lr_0 = 3.5772e-04
Loss = 1.0748e-02, PNorm = 175.7830, GNorm = 0.2391, lr_0 = 3.5747e-04
Loss = 1.3411e-02, PNorm = 175.7972, GNorm = 0.1578, lr_0 = 3.5723e-04
Loss = 1.1065e-02, PNorm = 175.8132, GNorm = 0.3351, lr_0 = 3.5698e-04
Loss = 1.0195e-02, PNorm = 175.8288, GNorm = 0.1970, lr_0 = 3.5674e-04
Loss = 1.7567e-02, PNorm = 175.8454, GNorm = 0.2108, lr_0 = 3.5650e-04
Loss = 1.8479e-02, PNorm = 175.8633, GNorm = 0.2489, lr_0 = 3.5625e-04
Loss = 1.0448e-02, PNorm = 175.8813, GNorm = 0.3572, lr_0 = 3.5601e-04
Loss = 1.5221e-02, PNorm = 175.8967, GNorm = 0.4689, lr_0 = 3.5576e-04
Loss = 1.2641e-02, PNorm = 175.9114, GNorm = 0.2813, lr_0 = 3.5552e-04
Loss = 1.1284e-02, PNorm = 175.9257, GNorm = 0.2271, lr_0 = 3.5528e-04
Loss = 1.6935e-02, PNorm = 175.9445, GNorm = 0.2972, lr_0 = 3.5503e-04
Loss = 1.1819e-02, PNorm = 175.9622, GNorm = 0.1636, lr_0 = 3.5479e-04
Loss = 1.1810e-02, PNorm = 175.9763, GNorm = 0.1308, lr_0 = 3.5455e-04
Loss = 1.3978e-02, PNorm = 175.9934, GNorm = 0.2845, lr_0 = 3.5430e-04
Loss = 1.7251e-02, PNorm = 176.0084, GNorm = 0.2060, lr_0 = 3.5406e-04
Loss = 1.2767e-02, PNorm = 176.0240, GNorm = 0.1019, lr_0 = 3.5382e-04
Loss = 1.4776e-02, PNorm = 176.0417, GNorm = 0.1801, lr_0 = 3.5358e-04
Loss = 1.4311e-02, PNorm = 176.0563, GNorm = 0.9422, lr_0 = 3.5333e-04
Loss = 1.4951e-02, PNorm = 176.0725, GNorm = 0.2935, lr_0 = 3.5309e-04
Loss = 1.4613e-02, PNorm = 176.0940, GNorm = 0.6604, lr_0 = 3.5285e-04
Loss = 1.4551e-02, PNorm = 176.1123, GNorm = 0.2321, lr_0 = 3.5261e-04
Loss = 1.3775e-02, PNorm = 176.1288, GNorm = 0.1930, lr_0 = 3.5237e-04
Loss = 1.1724e-02, PNorm = 176.1433, GNorm = 0.1950, lr_0 = 3.5212e-04
Loss = 1.9985e-02, PNorm = 176.1616, GNorm = 0.6631, lr_0 = 3.5188e-04
Loss = 1.7627e-02, PNorm = 176.1784, GNorm = 0.3254, lr_0 = 3.5164e-04
Loss = 1.2506e-02, PNorm = 176.1968, GNorm = 0.1604, lr_0 = 3.5140e-04
Loss = 1.4115e-02, PNorm = 176.2127, GNorm = 0.2702, lr_0 = 3.5116e-04
Loss = 1.4100e-02, PNorm = 176.2252, GNorm = 0.2945, lr_0 = 3.5092e-04
Loss = 1.4470e-02, PNorm = 176.2405, GNorm = 0.4614, lr_0 = 3.5068e-04
Loss = 1.4385e-02, PNorm = 176.2593, GNorm = 0.2912, lr_0 = 3.5044e-04
Loss = 1.4174e-02, PNorm = 176.2769, GNorm = 0.1997, lr_0 = 3.5020e-04
Loss = 1.5476e-02, PNorm = 176.2944, GNorm = 0.1954, lr_0 = 3.4996e-04
Loss = 1.2217e-02, PNorm = 176.3124, GNorm = 0.2142, lr_0 = 3.4972e-04
Loss = 1.0732e-02, PNorm = 176.3289, GNorm = 0.1703, lr_0 = 3.4948e-04
Loss = 1.0318e-02, PNorm = 176.3454, GNorm = 0.7369, lr_0 = 3.4924e-04
Loss = 1.1667e-02, PNorm = 176.3658, GNorm = 0.1666, lr_0 = 3.4900e-04
Loss = 1.2886e-02, PNorm = 176.3835, GNorm = 0.2420, lr_0 = 3.4876e-04
Loss = 1.2357e-02, PNorm = 176.4008, GNorm = 0.4797, lr_0 = 3.4852e-04
Loss = 1.3848e-02, PNorm = 176.4150, GNorm = 0.4497, lr_0 = 3.4828e-04
Loss = 1.8129e-02, PNorm = 176.4321, GNorm = 0.5294, lr_0 = 3.4805e-04
Loss = 1.1875e-02, PNorm = 176.4491, GNorm = 0.3590, lr_0 = 3.4781e-04
Loss = 1.2254e-02, PNorm = 176.4684, GNorm = 0.6215, lr_0 = 3.4757e-04
Loss = 1.3666e-02, PNorm = 176.4875, GNorm = 0.2597, lr_0 = 3.4733e-04
Loss = 1.0653e-02, PNorm = 176.5060, GNorm = 0.3626, lr_0 = 3.4709e-04
Loss = 9.7460e-03, PNorm = 176.5222, GNorm = 0.3908, lr_0 = 3.4686e-04
Loss = 1.8159e-02, PNorm = 176.5358, GNorm = 0.2904, lr_0 = 3.4662e-04
Loss = 1.5425e-02, PNorm = 176.5489, GNorm = 0.1872, lr_0 = 3.4638e-04
Loss = 1.5664e-02, PNorm = 176.5633, GNorm = 0.1458, lr_0 = 3.4614e-04
Loss = 1.7536e-02, PNorm = 176.5782, GNorm = 1.0912, lr_0 = 3.4591e-04
Loss = 1.2572e-02, PNorm = 176.5933, GNorm = 0.1769, lr_0 = 3.4567e-04
Loss = 1.3805e-02, PNorm = 176.6104, GNorm = 0.2504, lr_0 = 3.4543e-04
Loss = 1.4126e-02, PNorm = 176.6280, GNorm = 0.2093, lr_0 = 3.4520e-04
Loss = 1.6580e-02, PNorm = 176.6443, GNorm = 0.3562, lr_0 = 3.4496e-04
Loss = 3.3881e-02, PNorm = 176.6664, GNorm = 0.4407, lr_0 = 3.4472e-04
Loss = 2.4856e-02, PNorm = 176.6875, GNorm = 0.3168, lr_0 = 3.4449e-04
Loss = 1.7250e-02, PNorm = 176.7097, GNorm = 0.3264, lr_0 = 3.4425e-04
Loss = 1.3067e-02, PNorm = 176.7315, GNorm = 0.1756, lr_0 = 3.4402e-04
Loss = 1.1942e-02, PNorm = 176.7522, GNorm = 0.3628, lr_0 = 3.4378e-04
Loss = 1.7518e-02, PNorm = 176.7729, GNorm = 0.2718, lr_0 = 3.4354e-04
Loss = 1.1662e-02, PNorm = 176.7907, GNorm = 0.3027, lr_0 = 3.4331e-04
Validation mae = 0.121639
Epoch 15
Loss = 1.1015e-02, PNorm = 176.8027, GNorm = 0.2322, lr_0 = 3.4307e-04
Loss = 1.5872e-02, PNorm = 176.8127, GNorm = 0.2393, lr_0 = 3.4284e-04
Loss = 1.2250e-02, PNorm = 176.8227, GNorm = 0.2214, lr_0 = 3.4260e-04
Loss = 1.0094e-02, PNorm = 176.8307, GNorm = 0.1237, lr_0 = 3.4237e-04
Loss = 1.5351e-02, PNorm = 176.8409, GNorm = 0.2075, lr_0 = 3.4213e-04
Loss = 1.0708e-02, PNorm = 176.8520, GNorm = 0.1240, lr_0 = 3.4190e-04
Loss = 1.7235e-02, PNorm = 176.8599, GNorm = 0.2865, lr_0 = 3.4167e-04
Loss = 1.0128e-02, PNorm = 176.8689, GNorm = 0.2071, lr_0 = 3.4143e-04
Loss = 1.3079e-02, PNorm = 176.8807, GNorm = 0.3300, lr_0 = 3.4120e-04
Loss = 1.2186e-02, PNorm = 176.8937, GNorm = 0.1659, lr_0 = 3.4096e-04
Loss = 1.2230e-02, PNorm = 176.9077, GNorm = 0.3073, lr_0 = 3.4073e-04
Loss = 1.1006e-02, PNorm = 176.9198, GNorm = 0.3292, lr_0 = 3.4050e-04
Loss = 1.2494e-02, PNorm = 176.9294, GNorm = 0.3101, lr_0 = 3.4026e-04
Loss = 1.4061e-02, PNorm = 176.9403, GNorm = 0.2996, lr_0 = 3.4003e-04
Loss = 1.2532e-02, PNorm = 176.9522, GNorm = 0.2984, lr_0 = 3.3980e-04
Loss = 1.2355e-02, PNorm = 176.9649, GNorm = 0.3144, lr_0 = 3.3956e-04
Loss = 1.7109e-02, PNorm = 176.9782, GNorm = 0.3548, lr_0 = 3.3933e-04
Loss = 1.5403e-02, PNorm = 176.9901, GNorm = 0.9422, lr_0 = 3.3910e-04
Loss = 1.1873e-02, PNorm = 177.0053, GNorm = 0.4921, lr_0 = 3.3887e-04
Loss = 1.1452e-02, PNorm = 177.0179, GNorm = 0.2017, lr_0 = 3.3864e-04
Loss = 9.3745e-03, PNorm = 177.0288, GNorm = 0.3928, lr_0 = 3.3840e-04
Loss = 1.7878e-02, PNorm = 177.0396, GNorm = 0.2063, lr_0 = 3.3817e-04
Loss = 1.0727e-02, PNorm = 177.0523, GNorm = 0.1766, lr_0 = 3.3794e-04
Loss = 1.2145e-02, PNorm = 177.0681, GNorm = 0.1841, lr_0 = 3.3771e-04
Loss = 1.0867e-02, PNorm = 177.0831, GNorm = 0.2347, lr_0 = 3.3748e-04
Loss = 1.0315e-02, PNorm = 177.0981, GNorm = 0.2281, lr_0 = 3.3725e-04
Loss = 1.3121e-02, PNorm = 177.1126, GNorm = 0.2342, lr_0 = 3.3701e-04
Loss = 1.0775e-02, PNorm = 177.1219, GNorm = 0.3867, lr_0 = 3.3678e-04
Loss = 1.0202e-02, PNorm = 177.1330, GNorm = 0.1426, lr_0 = 3.3655e-04
Loss = 1.0611e-02, PNorm = 177.1464, GNorm = 0.2247, lr_0 = 3.3632e-04
Loss = 1.3909e-02, PNorm = 177.1584, GNorm = 0.2508, lr_0 = 3.3609e-04
Loss = 1.4995e-02, PNorm = 177.1687, GNorm = 0.2182, lr_0 = 3.3586e-04
Loss = 1.2310e-02, PNorm = 177.1823, GNorm = 0.2294, lr_0 = 3.3563e-04
Loss = 1.2466e-02, PNorm = 177.1940, GNorm = 0.1260, lr_0 = 3.3540e-04
Loss = 1.8067e-02, PNorm = 177.2045, GNorm = 1.3151, lr_0 = 3.3517e-04
Loss = 1.1596e-02, PNorm = 177.2167, GNorm = 0.1976, lr_0 = 3.3494e-04
Loss = 1.3878e-02, PNorm = 177.2289, GNorm = 0.4070, lr_0 = 3.3471e-04
Loss = 1.7689e-02, PNorm = 177.2422, GNorm = 0.3329, lr_0 = 3.3448e-04
Loss = 1.0843e-02, PNorm = 177.2557, GNorm = 0.3820, lr_0 = 3.3425e-04
Loss = 1.1632e-02, PNorm = 177.2704, GNorm = 0.1943, lr_0 = 3.3403e-04
Loss = 1.1429e-02, PNorm = 177.2873, GNorm = 0.2361, lr_0 = 3.3380e-04
Loss = 1.0341e-02, PNorm = 177.2965, GNorm = 0.3128, lr_0 = 3.3357e-04
Loss = 1.3419e-02, PNorm = 177.3047, GNorm = 0.4361, lr_0 = 3.3334e-04
Loss = 1.0612e-02, PNorm = 177.3163, GNorm = 0.2034, lr_0 = 3.3311e-04
Loss = 1.0095e-02, PNorm = 177.3260, GNorm = 0.3505, lr_0 = 3.3288e-04
Loss = 9.3623e-03, PNorm = 177.3354, GNorm = 0.1911, lr_0 = 3.3265e-04
Loss = 1.2015e-02, PNorm = 177.3483, GNorm = 0.2513, lr_0 = 3.3243e-04
Loss = 1.0412e-02, PNorm = 177.3610, GNorm = 0.3484, lr_0 = 3.3220e-04
Loss = 9.8172e-03, PNorm = 177.3746, GNorm = 0.1780, lr_0 = 3.3197e-04
Loss = 1.3079e-02, PNorm = 177.3863, GNorm = 0.3711, lr_0 = 3.3174e-04
Loss = 9.9204e-03, PNorm = 177.4007, GNorm = 0.1504, lr_0 = 3.3152e-04
Loss = 9.5643e-03, PNorm = 177.4129, GNorm = 0.2261, lr_0 = 3.3129e-04
Loss = 1.2830e-02, PNorm = 177.4263, GNorm = 0.1854, lr_0 = 3.3106e-04
Loss = 9.9735e-03, PNorm = 177.4369, GNorm = 0.1154, lr_0 = 3.3084e-04
Loss = 1.0471e-02, PNorm = 177.4472, GNorm = 0.2567, lr_0 = 3.3061e-04
Loss = 1.0510e-02, PNorm = 177.4568, GNorm = 0.1349, lr_0 = 3.3038e-04
Loss = 9.9381e-03, PNorm = 177.4708, GNorm = 0.4457, lr_0 = 3.3016e-04
Loss = 9.8146e-03, PNorm = 177.4826, GNorm = 0.3486, lr_0 = 3.2993e-04
Loss = 1.2724e-02, PNorm = 177.4931, GNorm = 0.2251, lr_0 = 3.2970e-04
Loss = 1.1007e-02, PNorm = 177.5049, GNorm = 0.3470, lr_0 = 3.2948e-04
Loss = 1.1429e-02, PNorm = 177.5199, GNorm = 0.1504, lr_0 = 3.2925e-04
Loss = 1.0533e-02, PNorm = 177.5307, GNorm = 0.1615, lr_0 = 3.2903e-04
Loss = 1.3379e-02, PNorm = 177.5431, GNorm = 0.4595, lr_0 = 3.2880e-04
Loss = 1.3825e-02, PNorm = 177.5570, GNorm = 0.2793, lr_0 = 3.2858e-04
Loss = 1.2628e-02, PNorm = 177.5683, GNorm = 0.2321, lr_0 = 3.2835e-04
Loss = 1.9014e-02, PNorm = 177.5800, GNorm = 1.7041, lr_0 = 3.2813e-04
Loss = 1.6295e-02, PNorm = 177.5977, GNorm = 0.2113, lr_0 = 3.2790e-04
Loss = 9.6285e-03, PNorm = 177.6121, GNorm = 0.1720, lr_0 = 3.2768e-04
Loss = 1.1535e-02, PNorm = 177.6253, GNorm = 0.3644, lr_0 = 3.2745e-04
Loss = 1.2255e-02, PNorm = 177.6414, GNorm = 0.3626, lr_0 = 3.2723e-04
Loss = 9.2608e-03, PNorm = 177.6544, GNorm = 0.1773, lr_0 = 3.2700e-04
Loss = 2.2514e-02, PNorm = 177.6704, GNorm = 0.2704, lr_0 = 3.2678e-04
Loss = 1.2060e-02, PNorm = 177.6875, GNorm = 0.2914, lr_0 = 3.2656e-04
Loss = 1.0096e-02, PNorm = 177.7044, GNorm = 0.6363, lr_0 = 3.2633e-04
Loss = 1.1411e-02, PNorm = 177.7172, GNorm = 0.3100, lr_0 = 3.2611e-04
Loss = 1.1727e-02, PNorm = 177.7304, GNorm = 0.4668, lr_0 = 3.2589e-04
Loss = 1.2213e-02, PNorm = 177.7423, GNorm = 0.1546, lr_0 = 3.2566e-04
Loss = 1.1096e-02, PNorm = 177.7540, GNorm = 0.3884, lr_0 = 3.2544e-04
Loss = 1.0412e-02, PNorm = 177.7677, GNorm = 0.2433, lr_0 = 3.2522e-04
Loss = 1.0144e-02, PNorm = 177.7825, GNorm = 0.1687, lr_0 = 3.2499e-04
Loss = 1.1818e-02, PNorm = 177.7966, GNorm = 0.3963, lr_0 = 3.2477e-04
Loss = 1.2497e-02, PNorm = 177.8148, GNorm = 0.1126, lr_0 = 3.2455e-04
Loss = 1.0899e-02, PNorm = 177.8308, GNorm = 0.2393, lr_0 = 3.2433e-04
Loss = 9.9164e-03, PNorm = 177.8463, GNorm = 0.4330, lr_0 = 3.2410e-04
Loss = 1.4846e-02, PNorm = 177.8611, GNorm = 0.1486, lr_0 = 3.2388e-04
Loss = 9.0612e-03, PNorm = 177.8745, GNorm = 0.1255, lr_0 = 3.2366e-04
Loss = 1.6744e-02, PNorm = 177.8856, GNorm = 0.2757, lr_0 = 3.2344e-04
Loss = 1.5939e-02, PNorm = 177.8961, GNorm = 0.4345, lr_0 = 3.2322e-04
Loss = 1.1874e-02, PNorm = 177.9064, GNorm = 0.3176, lr_0 = 3.2300e-04
Loss = 1.0588e-02, PNorm = 177.9253, GNorm = 0.1761, lr_0 = 3.2277e-04
Loss = 2.5447e-02, PNorm = 177.9383, GNorm = 0.2953, lr_0 = 3.2255e-04
Loss = 1.2270e-02, PNorm = 177.9494, GNorm = 0.2277, lr_0 = 3.2233e-04
Loss = 1.0926e-02, PNorm = 177.9621, GNorm = 0.1264, lr_0 = 3.2211e-04
Loss = 1.2415e-02, PNorm = 177.9748, GNorm = 0.2139, lr_0 = 3.2189e-04
Loss = 1.2658e-02, PNorm = 177.9897, GNorm = 0.1981, lr_0 = 3.2167e-04
Loss = 1.5461e-02, PNorm = 178.0056, GNorm = 0.2627, lr_0 = 3.2145e-04
Loss = 1.1468e-02, PNorm = 178.0222, GNorm = 0.2964, lr_0 = 3.2123e-04
Loss = 1.0563e-02, PNorm = 178.0384, GNorm = 0.2818, lr_0 = 3.2101e-04
Loss = 1.2699e-02, PNorm = 178.0552, GNorm = 0.3354, lr_0 = 3.2079e-04
Loss = 1.2296e-02, PNorm = 178.0726, GNorm = 0.2166, lr_0 = 3.2057e-04
Loss = 8.7216e-03, PNorm = 178.0871, GNorm = 0.3066, lr_0 = 3.2035e-04
Loss = 9.9207e-03, PNorm = 178.0991, GNorm = 0.2766, lr_0 = 3.2013e-04
Loss = 9.8853e-03, PNorm = 178.1129, GNorm = 0.7023, lr_0 = 3.1991e-04
Loss = 1.8297e-02, PNorm = 178.1279, GNorm = 0.2162, lr_0 = 3.1969e-04
Loss = 1.4055e-02, PNorm = 178.1418, GNorm = 0.2760, lr_0 = 3.1947e-04
Loss = 9.6581e-03, PNorm = 178.1526, GNorm = 0.3077, lr_0 = 3.1925e-04
Loss = 1.2666e-02, PNorm = 178.1657, GNorm = 0.1570, lr_0 = 3.1904e-04
Loss = 1.9547e-02, PNorm = 178.1820, GNorm = 0.2477, lr_0 = 3.1882e-04
Loss = 1.2565e-02, PNorm = 178.1966, GNorm = 0.3010, lr_0 = 3.1860e-04
Loss = 9.4210e-03, PNorm = 178.2117, GNorm = 0.3041, lr_0 = 3.1838e-04
Loss = 1.8816e-02, PNorm = 178.2221, GNorm = 0.6645, lr_0 = 3.1816e-04
Loss = 9.7144e-03, PNorm = 178.2394, GNorm = 0.4311, lr_0 = 3.1794e-04
Loss = 1.3625e-02, PNorm = 178.2516, GNorm = 0.4813, lr_0 = 3.1773e-04
Loss = 1.1265e-02, PNorm = 178.2660, GNorm = 0.3991, lr_0 = 3.1751e-04
Loss = 1.5071e-02, PNorm = 178.2815, GNorm = 0.2914, lr_0 = 3.1729e-04
Loss = 1.0793e-02, PNorm = 178.2995, GNorm = 0.4838, lr_0 = 3.1707e-04
Loss = 1.3645e-02, PNorm = 178.3174, GNorm = 0.3927, lr_0 = 3.1686e-04
Loss = 8.5553e-03, PNorm = 178.3319, GNorm = 0.2931, lr_0 = 3.1664e-04
Loss = 1.0821e-02, PNorm = 178.3454, GNorm = 0.1391, lr_0 = 3.1642e-04
Loss = 1.3176e-02, PNorm = 178.3615, GNorm = 0.4292, lr_0 = 3.1621e-04
Validation mae = 0.121771
Epoch 16
Loss = 1.0782e-02, PNorm = 178.3768, GNorm = 0.4942, lr_0 = 3.1599e-04
Loss = 1.3386e-02, PNorm = 178.3873, GNorm = 0.2666, lr_0 = 3.1577e-04
Loss = 1.4265e-02, PNorm = 178.3974, GNorm = 0.2912, lr_0 = 3.1556e-04
Loss = 1.1413e-02, PNorm = 178.4045, GNorm = 0.2130, lr_0 = 3.1534e-04
Loss = 1.3282e-02, PNorm = 178.4109, GNorm = 0.1853, lr_0 = 3.1512e-04
Loss = 8.6281e-03, PNorm = 178.4199, GNorm = 0.2209, lr_0 = 3.1491e-04
Loss = 9.1320e-03, PNorm = 178.4304, GNorm = 0.3124, lr_0 = 3.1469e-04
Loss = 1.2989e-02, PNorm = 178.4431, GNorm = 0.2605, lr_0 = 3.1448e-04
Loss = 1.7801e-02, PNorm = 178.4541, GNorm = 0.3253, lr_0 = 3.1426e-04
Loss = 1.1321e-02, PNorm = 178.4632, GNorm = 0.2560, lr_0 = 3.1405e-04
Loss = 8.6463e-03, PNorm = 178.4725, GNorm = 0.2260, lr_0 = 3.1383e-04
Loss = 1.0928e-02, PNorm = 178.4846, GNorm = 0.3328, lr_0 = 3.1362e-04
Loss = 1.0155e-02, PNorm = 178.4950, GNorm = 0.2219, lr_0 = 3.1340e-04
Loss = 9.6306e-03, PNorm = 178.5061, GNorm = 0.2971, lr_0 = 3.1319e-04
Loss = 9.7554e-03, PNorm = 178.5175, GNorm = 0.4283, lr_0 = 3.1297e-04
Loss = 1.1322e-02, PNorm = 178.5286, GNorm = 0.2733, lr_0 = 3.1276e-04
Loss = 1.1860e-02, PNorm = 178.5369, GNorm = 0.2272, lr_0 = 3.1254e-04
Loss = 1.0807e-02, PNorm = 178.5468, GNorm = 0.2204, lr_0 = 3.1233e-04
Loss = 1.0828e-02, PNorm = 178.5558, GNorm = 0.3018, lr_0 = 3.1212e-04
Loss = 1.3633e-02, PNorm = 178.5650, GNorm = 0.2429, lr_0 = 3.1190e-04
Loss = 8.0764e-03, PNorm = 178.5766, GNorm = 0.1173, lr_0 = 3.1169e-04
Loss = 8.4664e-03, PNorm = 178.5874, GNorm = 0.1632, lr_0 = 3.1147e-04
Loss = 1.0182e-02, PNorm = 178.5987, GNorm = 0.4241, lr_0 = 3.1126e-04
Loss = 1.3081e-02, PNorm = 178.6086, GNorm = 0.1630, lr_0 = 3.1105e-04
Loss = 8.4128e-03, PNorm = 178.6192, GNorm = 0.2188, lr_0 = 3.1083e-04
Loss = 9.1496e-03, PNorm = 178.6302, GNorm = 0.1741, lr_0 = 3.1062e-04
Loss = 8.8192e-03, PNorm = 178.6395, GNorm = 0.2660, lr_0 = 3.1041e-04
Loss = 1.1303e-02, PNorm = 178.6502, GNorm = 0.1048, lr_0 = 3.1020e-04
Loss = 8.5403e-03, PNorm = 178.6600, GNorm = 0.2278, lr_0 = 3.0998e-04
Loss = 1.2267e-02, PNorm = 178.6684, GNorm = 0.4214, lr_0 = 3.0977e-04
Loss = 8.5873e-03, PNorm = 178.6776, GNorm = 0.3229, lr_0 = 3.0956e-04
Loss = 1.1693e-02, PNorm = 178.6874, GNorm = 0.4990, lr_0 = 3.0935e-04
Loss = 8.7979e-03, PNorm = 178.6961, GNorm = 0.2007, lr_0 = 3.0914e-04
Loss = 9.3772e-03, PNorm = 178.7065, GNorm = 0.3455, lr_0 = 3.0892e-04
Loss = 1.1714e-02, PNorm = 178.7186, GNorm = 0.1841, lr_0 = 3.0871e-04
Loss = 1.2203e-02, PNorm = 178.7301, GNorm = 0.1432, lr_0 = 3.0850e-04
Loss = 1.3011e-02, PNorm = 178.7432, GNorm = 0.7718, lr_0 = 3.0829e-04
Loss = 8.8325e-03, PNorm = 178.7548, GNorm = 0.1779, lr_0 = 3.0808e-04
Loss = 7.9295e-03, PNorm = 178.7679, GNorm = 0.1701, lr_0 = 3.0787e-04
Loss = 1.0640e-02, PNorm = 178.7806, GNorm = 0.2789, lr_0 = 3.0766e-04
Loss = 1.0807e-02, PNorm = 178.7925, GNorm = 0.2309, lr_0 = 3.0745e-04
Loss = 8.0803e-03, PNorm = 178.8014, GNorm = 0.1954, lr_0 = 3.0723e-04
Loss = 1.1414e-02, PNorm = 178.8127, GNorm = 0.2484, lr_0 = 3.0702e-04
Loss = 9.0405e-03, PNorm = 178.8210, GNorm = 0.3724, lr_0 = 3.0681e-04
Loss = 1.1699e-02, PNorm = 178.8304, GNorm = 0.4028, lr_0 = 3.0660e-04
Loss = 9.9860e-03, PNorm = 178.8381, GNorm = 0.2871, lr_0 = 3.0639e-04
Loss = 1.6790e-02, PNorm = 178.8450, GNorm = 0.2038, lr_0 = 3.0618e-04
Loss = 1.0436e-02, PNorm = 178.8558, GNorm = 0.2013, lr_0 = 3.0597e-04
Loss = 1.1801e-02, PNorm = 178.8662, GNorm = 0.2286, lr_0 = 3.0576e-04
Loss = 1.1782e-02, PNorm = 178.8776, GNorm = 0.2090, lr_0 = 3.0555e-04
Loss = 1.5659e-02, PNorm = 178.8898, GNorm = 0.1441, lr_0 = 3.0535e-04
Loss = 1.1878e-02, PNorm = 178.9024, GNorm = 0.7769, lr_0 = 3.0514e-04
Loss = 9.0940e-03, PNorm = 178.9135, GNorm = 0.3831, lr_0 = 3.0493e-04
Loss = 1.1813e-02, PNorm = 178.9232, GNorm = 0.3718, lr_0 = 3.0472e-04
Loss = 8.5565e-03, PNorm = 178.9348, GNorm = 0.2482, lr_0 = 3.0451e-04
Loss = 1.1233e-02, PNorm = 178.9443, GNorm = 0.1853, lr_0 = 3.0430e-04
Loss = 1.0557e-02, PNorm = 178.9563, GNorm = 0.1758, lr_0 = 3.0409e-04
Loss = 8.6369e-03, PNorm = 178.9658, GNorm = 0.3611, lr_0 = 3.0388e-04
Loss = 1.0989e-02, PNorm = 178.9760, GNorm = 0.2540, lr_0 = 3.0368e-04
Loss = 8.9745e-03, PNorm = 178.9838, GNorm = 0.1979, lr_0 = 3.0347e-04
Loss = 9.0030e-03, PNorm = 178.9949, GNorm = 0.2078, lr_0 = 3.0326e-04
Loss = 8.7921e-03, PNorm = 179.0073, GNorm = 0.1105, lr_0 = 3.0305e-04
Loss = 1.0161e-02, PNorm = 179.0178, GNorm = 0.1633, lr_0 = 3.0284e-04
Loss = 8.2723e-03, PNorm = 179.0272, GNorm = 0.1897, lr_0 = 3.0264e-04
Loss = 8.9531e-03, PNorm = 179.0382, GNorm = 0.2593, lr_0 = 3.0243e-04
Loss = 9.0859e-03, PNorm = 179.0482, GNorm = 0.2265, lr_0 = 3.0222e-04
Loss = 1.0897e-02, PNorm = 179.0585, GNorm = 0.2734, lr_0 = 3.0202e-04
Loss = 8.5201e-03, PNorm = 179.0693, GNorm = 0.2384, lr_0 = 3.0181e-04
Loss = 1.1557e-02, PNorm = 179.0792, GNorm = 0.1409, lr_0 = 3.0160e-04
Loss = 7.3806e-03, PNorm = 179.0893, GNorm = 0.1367, lr_0 = 3.0140e-04
Loss = 1.1934e-02, PNorm = 179.0993, GNorm = 0.1872, lr_0 = 3.0119e-04
Loss = 1.4672e-02, PNorm = 179.1122, GNorm = 0.2214, lr_0 = 3.0098e-04
Loss = 1.4139e-02, PNorm = 179.1245, GNorm = 0.2992, lr_0 = 3.0078e-04
Loss = 1.1684e-02, PNorm = 179.1392, GNorm = 0.3527, lr_0 = 3.0057e-04
Loss = 1.1374e-02, PNorm = 179.1485, GNorm = 0.2161, lr_0 = 3.0036e-04
Loss = 1.0469e-02, PNorm = 179.1576, GNorm = 0.2684, lr_0 = 3.0016e-04
Loss = 1.0087e-02, PNorm = 179.1705, GNorm = 0.2561, lr_0 = 2.9995e-04
Loss = 9.9143e-03, PNorm = 179.1807, GNorm = 0.1921, lr_0 = 2.9975e-04
Loss = 9.5196e-03, PNorm = 179.1939, GNorm = 0.2122, lr_0 = 2.9954e-04
Loss = 8.0371e-03, PNorm = 179.2078, GNorm = 0.4205, lr_0 = 2.9934e-04
Loss = 8.8499e-03, PNorm = 179.2208, GNorm = 0.2301, lr_0 = 2.9913e-04
Loss = 1.4226e-02, PNorm = 179.2319, GNorm = 0.2653, lr_0 = 2.9893e-04
Loss = 1.1071e-02, PNorm = 179.2422, GNorm = 0.1050, lr_0 = 2.9872e-04
Loss = 7.8844e-03, PNorm = 179.2551, GNorm = 0.2139, lr_0 = 2.9852e-04
Loss = 1.1254e-02, PNorm = 179.2664, GNorm = 0.2700, lr_0 = 2.9831e-04
Loss = 8.1965e-03, PNorm = 179.2778, GNorm = 0.1382, lr_0 = 2.9811e-04
Loss = 1.0352e-02, PNorm = 179.2887, GNorm = 0.0907, lr_0 = 2.9790e-04
Loss = 7.8453e-03, PNorm = 179.2971, GNorm = 0.1504, lr_0 = 2.9770e-04
Loss = 1.8129e-02, PNorm = 179.3090, GNorm = 0.2423, lr_0 = 2.9750e-04
Loss = 9.4575e-03, PNorm = 179.3184, GNorm = 0.2167, lr_0 = 2.9729e-04
Loss = 1.5917e-02, PNorm = 179.3303, GNorm = 0.2927, lr_0 = 2.9709e-04
Loss = 1.0765e-02, PNorm = 179.3417, GNorm = 0.3036, lr_0 = 2.9689e-04
Loss = 1.2751e-02, PNorm = 179.3537, GNorm = 0.3188, lr_0 = 2.9668e-04
Loss = 9.2213e-03, PNorm = 179.3658, GNorm = 0.2507, lr_0 = 2.9648e-04
Loss = 1.0587e-02, PNorm = 179.3804, GNorm = 0.3256, lr_0 = 2.9628e-04
Loss = 9.4037e-03, PNorm = 179.3925, GNorm = 0.3216, lr_0 = 2.9607e-04
Loss = 1.4147e-02, PNorm = 179.4044, GNorm = 0.7693, lr_0 = 2.9587e-04
Loss = 1.0378e-02, PNorm = 179.4170, GNorm = 0.2516, lr_0 = 2.9567e-04
Loss = 1.0445e-02, PNorm = 179.4292, GNorm = 0.2046, lr_0 = 2.9546e-04
Loss = 1.5657e-02, PNorm = 179.4409, GNorm = 0.5335, lr_0 = 2.9526e-04
Loss = 9.3161e-03, PNorm = 179.4519, GNorm = 0.2243, lr_0 = 2.9506e-04
Loss = 7.6686e-03, PNorm = 179.4647, GNorm = 0.2067, lr_0 = 2.9486e-04
Loss = 1.1848e-02, PNorm = 179.4759, GNorm = 0.2960, lr_0 = 2.9466e-04
Loss = 1.0138e-02, PNorm = 179.4902, GNorm = 0.4770, lr_0 = 2.9445e-04
Loss = 1.4949e-02, PNorm = 179.5033, GNorm = 0.3365, lr_0 = 2.9425e-04
Loss = 9.3590e-03, PNorm = 179.5165, GNorm = 0.1744, lr_0 = 2.9405e-04
Loss = 8.9000e-03, PNorm = 179.5282, GNorm = 0.1345, lr_0 = 2.9385e-04
Loss = 1.0046e-02, PNorm = 179.5414, GNorm = 0.3434, lr_0 = 2.9365e-04
Loss = 9.3357e-03, PNorm = 179.5544, GNorm = 0.1237, lr_0 = 2.9345e-04
Loss = 7.5304e-03, PNorm = 179.5660, GNorm = 0.2334, lr_0 = 2.9325e-04
Loss = 8.4955e-03, PNorm = 179.5766, GNorm = 0.3634, lr_0 = 2.9305e-04
Loss = 7.6111e-03, PNorm = 179.5877, GNorm = 0.2844, lr_0 = 2.9284e-04
Loss = 1.3876e-02, PNorm = 179.5970, GNorm = 0.1518, lr_0 = 2.9264e-04
Loss = 8.5544e-03, PNorm = 179.6068, GNorm = 0.1450, lr_0 = 2.9244e-04
Loss = 1.6187e-02, PNorm = 179.6153, GNorm = 0.1982, lr_0 = 2.9224e-04
Loss = 1.2203e-02, PNorm = 179.6221, GNorm = 0.4218, lr_0 = 2.9204e-04
Loss = 1.3092e-02, PNorm = 179.6368, GNorm = 0.3209, lr_0 = 2.9184e-04
Loss = 8.2654e-03, PNorm = 179.6494, GNorm = 0.2729, lr_0 = 2.9164e-04
Loss = 1.0553e-02, PNorm = 179.6607, GNorm = 0.1503, lr_0 = 2.9144e-04
Loss = 7.9080e-03, PNorm = 179.6726, GNorm = 0.2241, lr_0 = 2.9124e-04
Validation mae = 0.121635
Epoch 17
Loss = 8.6787e-03, PNorm = 179.6836, GNorm = 0.2964, lr_0 = 2.9104e-04
Loss = 9.3663e-03, PNorm = 179.6927, GNorm = 0.2871, lr_0 = 2.9084e-04
Loss = 7.4064e-03, PNorm = 179.7002, GNorm = 0.1955, lr_0 = 2.9065e-04
Loss = 9.6050e-03, PNorm = 179.7091, GNorm = 0.3093, lr_0 = 2.9045e-04
Loss = 1.1097e-02, PNorm = 179.7181, GNorm = 0.4708, lr_0 = 2.9025e-04
Loss = 9.1289e-03, PNorm = 179.7291, GNorm = 0.1473, lr_0 = 2.9005e-04
Loss = 6.9463e-03, PNorm = 179.7373, GNorm = 0.1268, lr_0 = 2.8985e-04
Loss = 7.8166e-03, PNorm = 179.7458, GNorm = 0.1447, lr_0 = 2.8965e-04
Loss = 8.9192e-03, PNorm = 179.7550, GNorm = 0.2466, lr_0 = 2.8945e-04
Loss = 8.8416e-03, PNorm = 179.7640, GNorm = 0.2777, lr_0 = 2.8925e-04
Loss = 8.1845e-03, PNorm = 179.7750, GNorm = 0.1724, lr_0 = 2.8906e-04
Loss = 1.0222e-02, PNorm = 179.7831, GNorm = 0.5253, lr_0 = 2.8886e-04
Loss = 7.5970e-03, PNorm = 179.7904, GNorm = 0.3138, lr_0 = 2.8866e-04
Loss = 7.9183e-03, PNorm = 179.7980, GNorm = 0.1132, lr_0 = 2.8846e-04
Loss = 7.3079e-03, PNorm = 179.8059, GNorm = 0.1201, lr_0 = 2.8826e-04
Loss = 7.3234e-03, PNorm = 179.8155, GNorm = 0.1421, lr_0 = 2.8807e-04
Loss = 8.9903e-03, PNorm = 179.8264, GNorm = 0.2587, lr_0 = 2.8787e-04
Loss = 7.7469e-03, PNorm = 179.8380, GNorm = 0.4408, lr_0 = 2.8767e-04
Loss = 1.4212e-02, PNorm = 179.8451, GNorm = 0.4600, lr_0 = 2.8748e-04
Loss = 8.4329e-03, PNorm = 179.8542, GNorm = 0.1588, lr_0 = 2.8728e-04
Loss = 7.2070e-03, PNorm = 179.8617, GNorm = 0.1257, lr_0 = 2.8708e-04
Loss = 9.8480e-03, PNorm = 179.8711, GNorm = 0.1463, lr_0 = 2.8689e-04
Loss = 1.1422e-02, PNorm = 179.8818, GNorm = 0.1894, lr_0 = 2.8669e-04
Loss = 8.7794e-03, PNorm = 179.8914, GNorm = 0.5213, lr_0 = 2.8649e-04
Loss = 1.3653e-02, PNorm = 179.8999, GNorm = 0.3009, lr_0 = 2.8630e-04
Loss = 9.4942e-03, PNorm = 179.9086, GNorm = 0.1319, lr_0 = 2.8610e-04
Loss = 8.0433e-03, PNorm = 179.9163, GNorm = 0.4795, lr_0 = 2.8590e-04
Loss = 9.4311e-03, PNorm = 179.9232, GNorm = 0.1649, lr_0 = 2.8571e-04
Loss = 9.0810e-03, PNorm = 179.9320, GNorm = 0.1093, lr_0 = 2.8551e-04
Loss = 1.1889e-02, PNorm = 179.9403, GNorm = 0.3077, lr_0 = 2.8532e-04
Loss = 6.7329e-03, PNorm = 179.9502, GNorm = 0.1760, lr_0 = 2.8512e-04
Loss = 1.0405e-02, PNorm = 179.9592, GNorm = 0.1334, lr_0 = 2.8493e-04
Loss = 7.9225e-03, PNorm = 179.9652, GNorm = 0.1089, lr_0 = 2.8473e-04
Loss = 1.0055e-02, PNorm = 179.9750, GNorm = 0.5780, lr_0 = 2.8454e-04
Loss = 1.0698e-02, PNorm = 179.9862, GNorm = 0.1894, lr_0 = 2.8434e-04
Loss = 8.6393e-03, PNorm = 179.9945, GNorm = 0.2436, lr_0 = 2.8415e-04
Loss = 1.2042e-02, PNorm = 179.9992, GNorm = 0.1753, lr_0 = 2.8395e-04
Loss = 7.1531e-03, PNorm = 180.0073, GNorm = 0.1866, lr_0 = 2.8376e-04
Loss = 6.9854e-03, PNorm = 180.0171, GNorm = 0.1661, lr_0 = 2.8356e-04
Loss = 1.2361e-02, PNorm = 180.0232, GNorm = 0.3622, lr_0 = 2.8337e-04
Loss = 1.7282e-02, PNorm = 180.0306, GNorm = 0.3093, lr_0 = 2.8317e-04
Loss = 8.4510e-03, PNorm = 180.0385, GNorm = 0.1715, lr_0 = 2.8298e-04
Loss = 6.0967e-03, PNorm = 180.0479, GNorm = 0.1177, lr_0 = 2.8279e-04
Loss = 6.9622e-03, PNorm = 180.0573, GNorm = 0.2512, lr_0 = 2.8259e-04
Loss = 7.7926e-03, PNorm = 180.0652, GNorm = 0.1413, lr_0 = 2.8240e-04
Loss = 6.9222e-03, PNorm = 180.0751, GNorm = 0.2511, lr_0 = 2.8221e-04
Loss = 1.4022e-02, PNorm = 180.0806, GNorm = 0.1383, lr_0 = 2.8201e-04
Loss = 7.0809e-03, PNorm = 180.0852, GNorm = 0.2299, lr_0 = 2.8182e-04
Loss = 1.7172e-02, PNorm = 180.0936, GNorm = 0.2345, lr_0 = 2.8163e-04
Loss = 8.5887e-03, PNorm = 180.1068, GNorm = 0.3909, lr_0 = 2.8143e-04
Loss = 8.8204e-03, PNorm = 180.1212, GNorm = 0.4344, lr_0 = 2.8124e-04
Loss = 1.3389e-02, PNorm = 180.1315, GNorm = 0.2378, lr_0 = 2.8105e-04
Loss = 1.0306e-02, PNorm = 180.1418, GNorm = 0.1076, lr_0 = 2.8085e-04
Loss = 9.2905e-03, PNorm = 180.1510, GNorm = 0.1728, lr_0 = 2.8066e-04
Loss = 7.6251e-03, PNorm = 180.1626, GNorm = 0.1981, lr_0 = 2.8047e-04
Loss = 5.9250e-03, PNorm = 180.1714, GNorm = 0.1081, lr_0 = 2.8028e-04
Loss = 7.2951e-03, PNorm = 180.1787, GNorm = 0.1429, lr_0 = 2.8009e-04
Loss = 7.3394e-03, PNorm = 180.1859, GNorm = 0.1450, lr_0 = 2.7989e-04
Loss = 1.2724e-02, PNorm = 180.1940, GNorm = 0.2406, lr_0 = 2.7970e-04
Loss = 8.1592e-03, PNorm = 180.2038, GNorm = 0.3696, lr_0 = 2.7951e-04
Loss = 8.6138e-03, PNorm = 180.2161, GNorm = 0.1846, lr_0 = 2.7932e-04
Loss = 8.8210e-03, PNorm = 180.2274, GNorm = 0.1489, lr_0 = 2.7913e-04
Loss = 8.2349e-03, PNorm = 180.2373, GNorm = 0.1523, lr_0 = 2.7894e-04
Loss = 9.0582e-03, PNorm = 180.2462, GNorm = 0.2216, lr_0 = 2.7875e-04
Loss = 2.0308e-02, PNorm = 180.2580, GNorm = 0.2736, lr_0 = 2.7855e-04
Loss = 9.2620e-03, PNorm = 180.2663, GNorm = 0.2434, lr_0 = 2.7836e-04
Loss = 1.0877e-02, PNorm = 180.2750, GNorm = 0.3759, lr_0 = 2.7817e-04
Loss = 7.5595e-03, PNorm = 180.2871, GNorm = 0.2378, lr_0 = 2.7798e-04
Loss = 9.4793e-03, PNorm = 180.2972, GNorm = 0.3153, lr_0 = 2.7779e-04
Loss = 7.5973e-03, PNorm = 180.3065, GNorm = 0.3171, lr_0 = 2.7760e-04
Loss = 7.8362e-03, PNorm = 180.3157, GNorm = 0.1314, lr_0 = 2.7741e-04
Loss = 7.2903e-03, PNorm = 180.3245, GNorm = 0.1768, lr_0 = 2.7722e-04
Loss = 1.2314e-02, PNorm = 180.3355, GNorm = 0.3488, lr_0 = 2.7703e-04
Loss = 1.2111e-02, PNorm = 180.3450, GNorm = 0.1239, lr_0 = 2.7684e-04
Loss = 1.0855e-02, PNorm = 180.3534, GNorm = 0.1965, lr_0 = 2.7665e-04
Loss = 6.5872e-03, PNorm = 180.3611, GNorm = 0.1840, lr_0 = 2.7646e-04
Loss = 1.0312e-02, PNorm = 180.3672, GNorm = 0.1418, lr_0 = 2.7627e-04
Loss = 1.0549e-02, PNorm = 180.3767, GNorm = 0.1857, lr_0 = 2.7608e-04
Loss = 7.3379e-03, PNorm = 180.3858, GNorm = 0.1128, lr_0 = 2.7590e-04
Loss = 7.3249e-03, PNorm = 180.3972, GNorm = 0.1208, lr_0 = 2.7571e-04
Loss = 8.2133e-03, PNorm = 180.4078, GNorm = 0.1743, lr_0 = 2.7552e-04
Loss = 9.7318e-03, PNorm = 180.4165, GNorm = 0.1703, lr_0 = 2.7533e-04
Loss = 1.0569e-02, PNorm = 180.4218, GNorm = 0.1499, lr_0 = 2.7514e-04
Loss = 6.9386e-03, PNorm = 180.4299, GNorm = 0.1730, lr_0 = 2.7495e-04
Loss = 1.6677e-02, PNorm = 180.4383, GNorm = 0.1741, lr_0 = 2.7476e-04
Loss = 7.3751e-03, PNorm = 180.4474, GNorm = 0.1750, lr_0 = 2.7457e-04
Loss = 7.7087e-03, PNorm = 180.4587, GNorm = 0.4109, lr_0 = 2.7439e-04
Loss = 1.0924e-02, PNorm = 180.4695, GNorm = 0.1330, lr_0 = 2.7420e-04
Loss = 1.4193e-02, PNorm = 180.4783, GNorm = 0.1074, lr_0 = 2.7401e-04
Loss = 7.2494e-03, PNorm = 180.4883, GNorm = 0.3027, lr_0 = 2.7382e-04
Loss = 1.1327e-02, PNorm = 180.4984, GNorm = 0.4993, lr_0 = 2.7364e-04
Loss = 8.0681e-03, PNorm = 180.5119, GNorm = 0.1472, lr_0 = 2.7345e-04
Loss = 1.0365e-02, PNorm = 180.5230, GNorm = 0.2583, lr_0 = 2.7326e-04
Loss = 6.8655e-03, PNorm = 180.5355, GNorm = 0.1513, lr_0 = 2.7307e-04
Loss = 7.8035e-03, PNorm = 180.5450, GNorm = 0.1781, lr_0 = 2.7289e-04
Loss = 1.2379e-02, PNorm = 180.5547, GNorm = 0.0894, lr_0 = 2.7270e-04
Loss = 1.1140e-02, PNorm = 180.5637, GNorm = 0.2647, lr_0 = 2.7251e-04
Loss = 1.1284e-02, PNorm = 180.5739, GNorm = 0.3105, lr_0 = 2.7233e-04
Loss = 1.0389e-02, PNorm = 180.5840, GNorm = 0.0941, lr_0 = 2.7214e-04
Loss = 7.2534e-03, PNorm = 180.5923, GNorm = 0.1189, lr_0 = 2.7195e-04
Loss = 6.8191e-03, PNorm = 180.6016, GNorm = 0.1738, lr_0 = 2.7177e-04
Loss = 1.0337e-02, PNorm = 180.6112, GNorm = 0.1731, lr_0 = 2.7158e-04
Loss = 1.1669e-02, PNorm = 180.6201, GNorm = 0.2809, lr_0 = 2.7139e-04
Loss = 6.3076e-03, PNorm = 180.6292, GNorm = 0.2300, lr_0 = 2.7121e-04
Loss = 9.6460e-03, PNorm = 180.6399, GNorm = 0.1316, lr_0 = 2.7102e-04
Loss = 7.9476e-03, PNorm = 180.6474, GNorm = 0.2089, lr_0 = 2.7084e-04
Loss = 1.0730e-02, PNorm = 180.6572, GNorm = 0.1742, lr_0 = 2.7065e-04
Loss = 8.0372e-03, PNorm = 180.6662, GNorm = 0.1538, lr_0 = 2.7047e-04
Loss = 6.8974e-03, PNorm = 180.6760, GNorm = 0.2426, lr_0 = 2.7028e-04
Loss = 5.9184e-03, PNorm = 180.6855, GNorm = 0.2505, lr_0 = 2.7010e-04
Loss = 1.6803e-02, PNorm = 180.6899, GNorm = 0.3524, lr_0 = 2.6991e-04
Loss = 1.0685e-02, PNorm = 180.7027, GNorm = 0.4612, lr_0 = 2.6973e-04
Loss = 8.0701e-03, PNorm = 180.7157, GNorm = 0.1558, lr_0 = 2.6954e-04
Loss = 7.2532e-03, PNorm = 180.7276, GNorm = 0.1321, lr_0 = 2.6936e-04
Loss = 6.8671e-03, PNorm = 180.7412, GNorm = 0.1828, lr_0 = 2.6917e-04
Loss = 7.9575e-03, PNorm = 180.7511, GNorm = 0.1943, lr_0 = 2.6899e-04
Loss = 9.7239e-03, PNorm = 180.7603, GNorm = 0.1448, lr_0 = 2.6880e-04
Loss = 6.0759e-03, PNorm = 180.7703, GNorm = 0.1447, lr_0 = 2.6862e-04
Loss = 1.1267e-02, PNorm = 180.7778, GNorm = 0.4111, lr_0 = 2.6844e-04
Loss = 7.9633e-03, PNorm = 180.7860, GNorm = 0.1873, lr_0 = 2.6825e-04
Validation mae = 0.121308
Epoch 18
Loss = 8.1911e-03, PNorm = 180.7940, GNorm = 0.2544, lr_0 = 2.6807e-04
Loss = 9.3602e-03, PNorm = 180.8023, GNorm = 0.5435, lr_0 = 2.6788e-04
Loss = 6.7432e-03, PNorm = 180.8059, GNorm = 0.2740, lr_0 = 2.6770e-04
Loss = 7.2544e-03, PNorm = 180.8091, GNorm = 0.1818, lr_0 = 2.6752e-04
Loss = 1.1545e-02, PNorm = 180.8166, GNorm = 0.2340, lr_0 = 2.6733e-04
Loss = 8.4690e-03, PNorm = 180.8242, GNorm = 0.4043, lr_0 = 2.6715e-04
Loss = 9.1018e-03, PNorm = 180.8307, GNorm = 0.2847, lr_0 = 2.6697e-04
Loss = 5.4330e-03, PNorm = 180.8367, GNorm = 0.2626, lr_0 = 2.6678e-04
Loss = 5.6243e-03, PNorm = 180.8426, GNorm = 0.1377, lr_0 = 2.6660e-04
Loss = 9.3981e-03, PNorm = 180.8486, GNorm = 0.1722, lr_0 = 2.6642e-04
Loss = 6.5103e-03, PNorm = 180.8580, GNorm = 0.2779, lr_0 = 2.6624e-04
Loss = 9.8972e-03, PNorm = 180.8633, GNorm = 0.4458, lr_0 = 2.6605e-04
Loss = 4.7232e-03, PNorm = 180.8704, GNorm = 0.2018, lr_0 = 2.6587e-04
Loss = 8.7364e-03, PNorm = 180.8781, GNorm = 0.3553, lr_0 = 2.6569e-04
Loss = 5.3852e-03, PNorm = 180.8848, GNorm = 0.2556, lr_0 = 2.6551e-04
Loss = 1.0522e-02, PNorm = 180.8896, GNorm = 0.1780, lr_0 = 2.6533e-04
Loss = 8.4853e-03, PNorm = 180.8956, GNorm = 0.1920, lr_0 = 2.6514e-04
Loss = 5.3090e-03, PNorm = 180.9028, GNorm = 0.3947, lr_0 = 2.6496e-04
Loss = 8.5002e-03, PNorm = 180.9100, GNorm = 0.1211, lr_0 = 2.6478e-04
Loss = 7.1220e-03, PNorm = 180.9180, GNorm = 0.1539, lr_0 = 2.6460e-04
Loss = 9.5907e-03, PNorm = 180.9252, GNorm = 0.2041, lr_0 = 2.6442e-04
Loss = 6.5398e-03, PNorm = 180.9306, GNorm = 0.1168, lr_0 = 2.6424e-04
Loss = 6.0026e-03, PNorm = 180.9374, GNorm = 0.2618, lr_0 = 2.6406e-04
Loss = 6.6046e-03, PNorm = 180.9421, GNorm = 0.1771, lr_0 = 2.6388e-04
Loss = 5.5545e-03, PNorm = 180.9468, GNorm = 0.2452, lr_0 = 2.6369e-04
Loss = 7.7297e-03, PNorm = 180.9513, GNorm = 0.3890, lr_0 = 2.6351e-04
Loss = 7.1385e-03, PNorm = 180.9572, GNorm = 0.1569, lr_0 = 2.6333e-04
Loss = 5.4492e-03, PNorm = 180.9669, GNorm = 0.1247, lr_0 = 2.6315e-04
Loss = 5.5308e-03, PNorm = 180.9751, GNorm = 0.1468, lr_0 = 2.6297e-04
Loss = 7.8476e-03, PNorm = 180.9847, GNorm = 0.1978, lr_0 = 2.6279e-04
Loss = 5.3796e-03, PNorm = 180.9907, GNorm = 0.2245, lr_0 = 2.6261e-04
Loss = 9.6743e-03, PNorm = 180.9948, GNorm = 0.1960, lr_0 = 2.6243e-04
Loss = 9.7741e-03, PNorm = 181.0012, GNorm = 0.2499, lr_0 = 2.6225e-04
Loss = 8.3855e-03, PNorm = 181.0095, GNorm = 0.1032, lr_0 = 2.6207e-04
Loss = 6.3334e-03, PNorm = 181.0176, GNorm = 0.1587, lr_0 = 2.6189e-04
Loss = 6.3131e-03, PNorm = 181.0274, GNorm = 0.2005, lr_0 = 2.6171e-04
Loss = 1.4019e-02, PNorm = 181.0353, GNorm = 0.3632, lr_0 = 2.6153e-04
Loss = 7.0891e-03, PNorm = 181.0452, GNorm = 0.1762, lr_0 = 2.6136e-04
Loss = 7.6978e-03, PNorm = 181.0540, GNorm = 0.1111, lr_0 = 2.6118e-04
Loss = 7.7974e-03, PNorm = 181.0624, GNorm = 0.2447, lr_0 = 2.6100e-04
Loss = 1.1253e-02, PNorm = 181.0730, GNorm = 0.8349, lr_0 = 2.6082e-04
Loss = 6.8397e-03, PNorm = 181.0838, GNorm = 0.1692, lr_0 = 2.6064e-04
Loss = 7.5124e-03, PNorm = 181.0930, GNorm = 0.2562, lr_0 = 2.6046e-04
Loss = 7.1776e-03, PNorm = 181.1039, GNorm = 0.2624, lr_0 = 2.6028e-04
Loss = 6.2415e-03, PNorm = 181.1110, GNorm = 0.3894, lr_0 = 2.6011e-04
Loss = 6.8810e-03, PNorm = 181.1188, GNorm = 0.2209, lr_0 = 2.5993e-04
Loss = 7.1148e-03, PNorm = 181.1249, GNorm = 0.2700, lr_0 = 2.5975e-04
Loss = 7.0200e-03, PNorm = 181.1334, GNorm = 0.1400, lr_0 = 2.5957e-04
Loss = 8.2691e-03, PNorm = 181.1409, GNorm = 0.2765, lr_0 = 2.5939e-04
Loss = 7.3091e-03, PNorm = 181.1487, GNorm = 0.1581, lr_0 = 2.5922e-04
Loss = 5.9901e-03, PNorm = 181.1541, GNorm = 0.1961, lr_0 = 2.5904e-04
Loss = 1.0751e-02, PNorm = 181.1619, GNorm = 0.2215, lr_0 = 2.5886e-04
Loss = 6.5194e-03, PNorm = 181.1686, GNorm = 0.1897, lr_0 = 2.5868e-04
Loss = 6.3414e-03, PNorm = 181.1730, GNorm = 0.1519, lr_0 = 2.5851e-04
Loss = 6.6570e-03, PNorm = 181.1794, GNorm = 0.1353, lr_0 = 2.5833e-04
Loss = 1.0693e-02, PNorm = 181.1853, GNorm = 0.2627, lr_0 = 2.5815e-04
Loss = 8.9674e-03, PNorm = 181.1904, GNorm = 0.1251, lr_0 = 2.5797e-04
Loss = 1.0080e-02, PNorm = 181.1981, GNorm = 0.1558, lr_0 = 2.5780e-04
Loss = 5.1909e-03, PNorm = 181.2066, GNorm = 0.1471, lr_0 = 2.5762e-04
Loss = 7.5725e-03, PNorm = 181.2138, GNorm = 0.1621, lr_0 = 2.5745e-04
Loss = 6.3175e-03, PNorm = 181.2205, GNorm = 0.1834, lr_0 = 2.5727e-04
Loss = 1.3055e-02, PNorm = 181.2285, GNorm = 0.0933, lr_0 = 2.5709e-04
Loss = 8.7688e-03, PNorm = 181.2348, GNorm = 0.1438, lr_0 = 2.5692e-04
Loss = 5.4322e-03, PNorm = 181.2410, GNorm = 0.1632, lr_0 = 2.5674e-04
Loss = 5.4101e-03, PNorm = 181.2477, GNorm = 0.1419, lr_0 = 2.5656e-04
Loss = 1.4452e-02, PNorm = 181.2552, GNorm = 0.7058, lr_0 = 2.5639e-04
Loss = 6.0016e-03, PNorm = 181.2634, GNorm = 0.1641, lr_0 = 2.5621e-04
Loss = 6.4354e-03, PNorm = 181.2698, GNorm = 0.1304, lr_0 = 2.5604e-04
Loss = 1.1940e-02, PNorm = 181.2782, GNorm = 0.2498, lr_0 = 2.5586e-04
Loss = 8.1400e-03, PNorm = 181.2888, GNorm = 0.3723, lr_0 = 2.5569e-04
Loss = 7.2200e-03, PNorm = 181.2959, GNorm = 0.2469, lr_0 = 2.5551e-04
Loss = 1.1912e-02, PNorm = 181.3052, GNorm = 0.3344, lr_0 = 2.5534e-04
Loss = 5.5838e-03, PNorm = 181.3133, GNorm = 0.1820, lr_0 = 2.5516e-04
Loss = 7.4972e-03, PNorm = 181.3198, GNorm = 0.2466, lr_0 = 2.5499e-04
Loss = 7.2500e-03, PNorm = 181.3295, GNorm = 0.1766, lr_0 = 2.5481e-04
Loss = 6.4750e-03, PNorm = 181.3372, GNorm = 0.2096, lr_0 = 2.5464e-04
Loss = 1.0038e-02, PNorm = 181.3443, GNorm = 0.1548, lr_0 = 2.5446e-04
Loss = 9.3512e-03, PNorm = 181.3518, GNorm = 0.1908, lr_0 = 2.5429e-04
Loss = 1.2904e-02, PNorm = 181.3595, GNorm = 0.1288, lr_0 = 2.5411e-04
Loss = 8.3689e-03, PNorm = 181.3678, GNorm = 0.2127, lr_0 = 2.5394e-04
Loss = 5.8096e-03, PNorm = 181.3760, GNorm = 0.2992, lr_0 = 2.5377e-04
Loss = 6.9176e-03, PNorm = 181.3855, GNorm = 0.2981, lr_0 = 2.5359e-04
Loss = 6.7347e-03, PNorm = 181.3934, GNorm = 0.3245, lr_0 = 2.5342e-04
Loss = 6.4690e-03, PNorm = 181.4001, GNorm = 0.3147, lr_0 = 2.5325e-04
Loss = 6.5617e-03, PNorm = 181.4091, GNorm = 0.1430, lr_0 = 2.5307e-04
Loss = 5.5087e-03, PNorm = 181.4183, GNorm = 0.2083, lr_0 = 2.5290e-04
Loss = 7.7382e-03, PNorm = 181.4248, GNorm = 0.1090, lr_0 = 2.5273e-04
Loss = 6.2412e-03, PNorm = 181.4337, GNorm = 0.2006, lr_0 = 2.5255e-04
Loss = 6.8427e-03, PNorm = 181.4429, GNorm = 0.1407, lr_0 = 2.5238e-04
Loss = 6.9479e-03, PNorm = 181.4518, GNorm = 0.4942, lr_0 = 2.5221e-04
Loss = 5.0975e-03, PNorm = 181.4616, GNorm = 0.0920, lr_0 = 2.5203e-04
Loss = 1.0441e-02, PNorm = 181.4685, GNorm = 0.2744, lr_0 = 2.5186e-04
Loss = 5.6706e-03, PNorm = 181.4768, GNorm = 0.2893, lr_0 = 2.5169e-04
Loss = 7.2205e-03, PNorm = 181.4839, GNorm = 0.2097, lr_0 = 2.5152e-04
Loss = 7.2544e-03, PNorm = 181.4916, GNorm = 0.1120, lr_0 = 2.5134e-04
Loss = 1.0042e-02, PNorm = 181.4984, GNorm = 0.3130, lr_0 = 2.5117e-04
Loss = 8.6306e-03, PNorm = 181.5066, GNorm = 0.1071, lr_0 = 2.5100e-04
Loss = 7.9749e-03, PNorm = 181.5161, GNorm = 0.1937, lr_0 = 2.5083e-04
Loss = 1.0395e-02, PNorm = 181.5270, GNorm = 0.1678, lr_0 = 2.5066e-04
Loss = 6.9928e-03, PNorm = 181.5362, GNorm = 0.0864, lr_0 = 2.5048e-04
Loss = 6.2442e-03, PNorm = 181.5446, GNorm = 0.1567, lr_0 = 2.5031e-04
Loss = 7.4823e-03, PNorm = 181.5521, GNorm = 0.0913, lr_0 = 2.5014e-04
Loss = 8.3429e-03, PNorm = 181.5603, GNorm = 0.2654, lr_0 = 2.4997e-04
Loss = 9.9457e-03, PNorm = 181.5673, GNorm = 0.4707, lr_0 = 2.4980e-04
Loss = 1.1529e-02, PNorm = 181.5761, GNorm = 0.2970, lr_0 = 2.4963e-04
Loss = 1.0551e-02, PNorm = 181.5808, GNorm = 0.0677, lr_0 = 2.4946e-04
Loss = 8.7600e-03, PNorm = 181.5880, GNorm = 0.2594, lr_0 = 2.4929e-04
Loss = 6.5770e-03, PNorm = 181.5946, GNorm = 0.2719, lr_0 = 2.4911e-04
Loss = 7.5273e-03, PNorm = 181.6010, GNorm = 0.2136, lr_0 = 2.4894e-04
Loss = 7.9856e-03, PNorm = 181.6117, GNorm = 0.2560, lr_0 = 2.4877e-04
Loss = 1.2400e-02, PNorm = 181.6196, GNorm = 0.8280, lr_0 = 2.4860e-04
Loss = 1.5848e-02, PNorm = 181.6249, GNorm = 0.2343, lr_0 = 2.4843e-04
Loss = 7.0960e-03, PNorm = 181.6336, GNorm = 0.2066, lr_0 = 2.4826e-04
Loss = 1.0133e-02, PNorm = 181.6430, GNorm = 0.1775, lr_0 = 2.4809e-04
Loss = 6.3697e-03, PNorm = 181.6521, GNorm = 0.1431, lr_0 = 2.4792e-04
Loss = 8.5348e-03, PNorm = 181.6608, GNorm = 0.1877, lr_0 = 2.4775e-04
Loss = 1.1996e-02, PNorm = 181.6645, GNorm = 0.3652, lr_0 = 2.4758e-04
Loss = 9.3303e-03, PNorm = 181.6715, GNorm = 0.2522, lr_0 = 2.4741e-04
Loss = 6.8860e-03, PNorm = 181.6813, GNorm = 0.2890, lr_0 = 2.4724e-04
Loss = 2.4458e-02, PNorm = 181.6911, GNorm = 0.1245, lr_0 = 2.4707e-04
Validation mae = 0.121681
Epoch 19
Loss = 6.9358e-03, PNorm = 181.6976, GNorm = 0.2409, lr_0 = 2.4690e-04
Loss = 8.2597e-03, PNorm = 181.7008, GNorm = 0.2072, lr_0 = 2.4674e-04
Loss = 5.4895e-03, PNorm = 181.7076, GNorm = 0.2372, lr_0 = 2.4657e-04
Loss = 7.9363e-03, PNorm = 181.7127, GNorm = 0.1477, lr_0 = 2.4640e-04
Loss = 8.4992e-03, PNorm = 181.7186, GNorm = 0.2405, lr_0 = 2.4623e-04
Loss = 5.0596e-03, PNorm = 181.7266, GNorm = 0.2192, lr_0 = 2.4606e-04
Loss = 7.3649e-03, PNorm = 181.7344, GNorm = 0.0896, lr_0 = 2.4589e-04
Loss = 9.4478e-03, PNorm = 181.7398, GNorm = 0.3178, lr_0 = 2.4572e-04
Loss = 7.8914e-03, PNorm = 181.7493, GNorm = 0.1479, lr_0 = 2.4556e-04
Loss = 8.7900e-03, PNorm = 181.7561, GNorm = 0.2216, lr_0 = 2.4539e-04
Loss = 5.9617e-03, PNorm = 181.7605, GNorm = 0.1473, lr_0 = 2.4522e-04
Loss = 5.1203e-03, PNorm = 181.7662, GNorm = 0.2182, lr_0 = 2.4505e-04
Loss = 1.0277e-02, PNorm = 181.7745, GNorm = 0.3978, lr_0 = 2.4488e-04
Loss = 7.1971e-03, PNorm = 181.7794, GNorm = 0.2352, lr_0 = 2.4472e-04
Loss = 5.6378e-03, PNorm = 181.7860, GNorm = 0.1746, lr_0 = 2.4455e-04
Loss = 7.0346e-03, PNorm = 181.7918, GNorm = 0.1226, lr_0 = 2.4438e-04
Loss = 5.3470e-03, PNorm = 181.8002, GNorm = 0.1321, lr_0 = 2.4421e-04
Loss = 6.2564e-03, PNorm = 181.8073, GNorm = 0.1283, lr_0 = 2.4405e-04
Loss = 7.5338e-03, PNorm = 181.8127, GNorm = 0.3456, lr_0 = 2.4388e-04
Loss = 6.2778e-03, PNorm = 181.8183, GNorm = 0.1007, lr_0 = 2.4371e-04
Loss = 5.6114e-03, PNorm = 181.8251, GNorm = 0.1591, lr_0 = 2.4354e-04
Loss = 9.4518e-03, PNorm = 181.8311, GNorm = 0.5259, lr_0 = 2.4338e-04
Loss = 8.4360e-03, PNorm = 181.8378, GNorm = 0.2036, lr_0 = 2.4321e-04
Loss = 5.5746e-03, PNorm = 181.8404, GNorm = 0.1270, lr_0 = 2.4304e-04
Loss = 7.3469e-03, PNorm = 181.8445, GNorm = 0.1301, lr_0 = 2.4288e-04
Loss = 6.1572e-03, PNorm = 181.8515, GNorm = 0.3980, lr_0 = 2.4271e-04
Loss = 6.6121e-03, PNorm = 181.8567, GNorm = 0.2786, lr_0 = 2.4254e-04
Loss = 7.1416e-03, PNorm = 181.8638, GNorm = 0.1898, lr_0 = 2.4238e-04
Loss = 4.6391e-03, PNorm = 181.8715, GNorm = 0.2229, lr_0 = 2.4221e-04
Loss = 7.1123e-03, PNorm = 181.8762, GNorm = 0.2161, lr_0 = 2.4205e-04
Loss = 5.4570e-03, PNorm = 181.8815, GNorm = 0.1692, lr_0 = 2.4188e-04
Loss = 6.6188e-03, PNorm = 181.8899, GNorm = 0.1590, lr_0 = 2.4171e-04
Loss = 7.0454e-03, PNorm = 181.9002, GNorm = 0.2037, lr_0 = 2.4155e-04
Loss = 5.0586e-03, PNorm = 181.9054, GNorm = 0.3947, lr_0 = 2.4138e-04
Loss = 6.4273e-03, PNorm = 181.9100, GNorm = 0.3094, lr_0 = 2.4122e-04
Loss = 6.3137e-03, PNorm = 181.9183, GNorm = 0.1189, lr_0 = 2.4105e-04
Loss = 6.2611e-03, PNorm = 181.9269, GNorm = 0.1983, lr_0 = 2.4089e-04
Loss = 6.9326e-03, PNorm = 181.9328, GNorm = 0.3993, lr_0 = 2.4072e-04
Loss = 4.8374e-03, PNorm = 181.9393, GNorm = 0.1837, lr_0 = 2.4056e-04
Loss = 7.6565e-03, PNorm = 181.9444, GNorm = 0.1312, lr_0 = 2.4039e-04
Loss = 9.1097e-03, PNorm = 181.9505, GNorm = 0.1473, lr_0 = 2.4023e-04
Loss = 1.0525e-02, PNorm = 181.9564, GNorm = 0.2405, lr_0 = 2.4006e-04
Loss = 5.0733e-03, PNorm = 181.9627, GNorm = 0.6230, lr_0 = 2.3990e-04
Loss = 6.9435e-03, PNorm = 181.9674, GNorm = 0.1800, lr_0 = 2.3974e-04
Loss = 6.5197e-03, PNorm = 181.9722, GNorm = 0.1973, lr_0 = 2.3957e-04
Loss = 6.5293e-03, PNorm = 181.9772, GNorm = 0.1261, lr_0 = 2.3941e-04
Loss = 4.6278e-03, PNorm = 181.9819, GNorm = 0.1337, lr_0 = 2.3924e-04
Loss = 4.7199e-03, PNorm = 181.9891, GNorm = 0.2231, lr_0 = 2.3908e-04
Loss = 8.2802e-03, PNorm = 181.9918, GNorm = 0.1539, lr_0 = 2.3892e-04
Loss = 6.7997e-03, PNorm = 181.9953, GNorm = 0.1467, lr_0 = 2.3875e-04
Loss = 5.2111e-03, PNorm = 182.0036, GNorm = 0.2078, lr_0 = 2.3859e-04
Loss = 1.6409e-02, PNorm = 182.0124, GNorm = 0.1136, lr_0 = 2.3842e-04
Loss = 9.9206e-03, PNorm = 182.0149, GNorm = 0.1633, lr_0 = 2.3826e-04
Loss = 5.5232e-03, PNorm = 182.0183, GNorm = 0.1015, lr_0 = 2.3810e-04
Loss = 4.9470e-03, PNorm = 182.0243, GNorm = 0.1028, lr_0 = 2.3794e-04
Loss = 1.0152e-02, PNorm = 182.0298, GNorm = 0.1579, lr_0 = 2.3777e-04
Loss = 1.0364e-02, PNorm = 182.0362, GNorm = 0.6037, lr_0 = 2.3761e-04
Loss = 4.8548e-03, PNorm = 182.0435, GNorm = 0.0912, lr_0 = 2.3745e-04
Loss = 1.1783e-02, PNorm = 182.0508, GNorm = 0.0881, lr_0 = 2.3728e-04
Loss = 9.0619e-03, PNorm = 182.0585, GNorm = 0.2889, lr_0 = 2.3712e-04
Loss = 1.0633e-02, PNorm = 182.0645, GNorm = 0.2624, lr_0 = 2.3696e-04
Loss = 6.0001e-03, PNorm = 182.0713, GNorm = 0.5412, lr_0 = 2.3680e-04
Loss = 4.5986e-03, PNorm = 182.0750, GNorm = 0.1378, lr_0 = 2.3663e-04
Loss = 5.3251e-03, PNorm = 182.0806, GNorm = 0.1545, lr_0 = 2.3647e-04
Loss = 7.2254e-03, PNorm = 182.0846, GNorm = 0.1619, lr_0 = 2.3631e-04
Loss = 7.5933e-03, PNorm = 182.0912, GNorm = 0.2379, lr_0 = 2.3615e-04
Loss = 7.6978e-03, PNorm = 182.0966, GNorm = 0.1617, lr_0 = 2.3599e-04
Loss = 4.7514e-03, PNorm = 182.1048, GNorm = 0.2986, lr_0 = 2.3582e-04
Loss = 5.6803e-03, PNorm = 182.1110, GNorm = 0.3426, lr_0 = 2.3566e-04
Loss = 8.4959e-03, PNorm = 182.1168, GNorm = 0.2310, lr_0 = 2.3550e-04
Loss = 6.3399e-03, PNorm = 182.1230, GNorm = 0.1779, lr_0 = 2.3534e-04
Loss = 5.9783e-03, PNorm = 182.1275, GNorm = 0.3127, lr_0 = 2.3518e-04
Loss = 6.7904e-03, PNorm = 182.1330, GNorm = 0.1228, lr_0 = 2.3502e-04
Loss = 5.9301e-03, PNorm = 182.1403, GNorm = 0.2254, lr_0 = 2.3486e-04
Loss = 7.1513e-03, PNorm = 182.1455, GNorm = 0.2884, lr_0 = 2.3470e-04
Loss = 5.6520e-03, PNorm = 182.1511, GNorm = 0.2424, lr_0 = 2.3454e-04
Loss = 6.4639e-03, PNorm = 182.1577, GNorm = 0.1343, lr_0 = 2.3437e-04
Loss = 5.3719e-03, PNorm = 182.1651, GNorm = 0.1695, lr_0 = 2.3421e-04
Loss = 6.0997e-03, PNorm = 182.1743, GNorm = 0.1032, lr_0 = 2.3405e-04
Loss = 5.0842e-03, PNorm = 182.1814, GNorm = 0.2429, lr_0 = 2.3389e-04
Loss = 1.1788e-02, PNorm = 182.1887, GNorm = 0.1021, lr_0 = 2.3373e-04
Loss = 5.9807e-03, PNorm = 182.1945, GNorm = 0.0615, lr_0 = 2.3357e-04
Loss = 8.0288e-03, PNorm = 182.2004, GNorm = 0.2743, lr_0 = 2.3341e-04
Loss = 4.3895e-03, PNorm = 182.2057, GNorm = 0.1311, lr_0 = 2.3325e-04
Loss = 6.9123e-03, PNorm = 182.2129, GNorm = 0.1011, lr_0 = 2.3309e-04
Loss = 5.2349e-03, PNorm = 182.2189, GNorm = 0.2503, lr_0 = 2.3293e-04
Loss = 4.2488e-03, PNorm = 182.2257, GNorm = 0.2992, lr_0 = 2.3277e-04
Loss = 8.1160e-03, PNorm = 182.2316, GNorm = 0.1111, lr_0 = 2.3261e-04
Loss = 1.3617e-02, PNorm = 182.2358, GNorm = 0.1536, lr_0 = 2.3246e-04
Loss = 5.1966e-03, PNorm = 182.2430, GNorm = 0.1966, lr_0 = 2.3230e-04
Loss = 3.9943e-03, PNorm = 182.2493, GNorm = 0.1728, lr_0 = 2.3214e-04
Loss = 4.6215e-03, PNorm = 182.2556, GNorm = 0.1277, lr_0 = 2.3198e-04
Loss = 1.1949e-02, PNorm = 182.2638, GNorm = 0.2111, lr_0 = 2.3182e-04
Loss = 5.3911e-03, PNorm = 182.2694, GNorm = 0.1303, lr_0 = 2.3166e-04
Loss = 5.4055e-03, PNorm = 182.2749, GNorm = 0.2197, lr_0 = 2.3150e-04
Loss = 1.0684e-02, PNorm = 182.2820, GNorm = 0.1753, lr_0 = 2.3134e-04
Loss = 7.9445e-03, PNorm = 182.2923, GNorm = 0.4291, lr_0 = 2.3118e-04
Loss = 5.2407e-03, PNorm = 182.3007, GNorm = 0.0912, lr_0 = 2.3103e-04
Loss = 5.7471e-03, PNorm = 182.3062, GNorm = 0.0923, lr_0 = 2.3087e-04
Loss = 9.4180e-03, PNorm = 182.3125, GNorm = 0.1779, lr_0 = 2.3071e-04
Loss = 5.9301e-03, PNorm = 182.3193, GNorm = 0.0970, lr_0 = 2.3055e-04
Loss = 8.5365e-03, PNorm = 182.3273, GNorm = 0.1622, lr_0 = 2.3039e-04
Loss = 1.1944e-02, PNorm = 182.3326, GNorm = 0.3385, lr_0 = 2.3024e-04
Loss = 5.5203e-03, PNorm = 182.3381, GNorm = 0.2140, lr_0 = 2.3008e-04
Loss = 1.3535e-02, PNorm = 182.3450, GNorm = 0.2485, lr_0 = 2.2992e-04
Loss = 6.3546e-03, PNorm = 182.3515, GNorm = 0.0869, lr_0 = 2.2976e-04
Loss = 5.0630e-03, PNorm = 182.3587, GNorm = 0.2889, lr_0 = 2.2961e-04
Loss = 9.8416e-03, PNorm = 182.3651, GNorm = 0.1502, lr_0 = 2.2945e-04
Loss = 7.8326e-03, PNorm = 182.3701, GNorm = 0.2326, lr_0 = 2.2929e-04
Loss = 1.1707e-02, PNorm = 182.3755, GNorm = 0.5560, lr_0 = 2.2913e-04
Loss = 4.7479e-03, PNorm = 182.3834, GNorm = 0.1743, lr_0 = 2.2898e-04
Loss = 6.4581e-03, PNorm = 182.3922, GNorm = 0.1957, lr_0 = 2.2882e-04
Loss = 7.1419e-03, PNorm = 182.4010, GNorm = 0.1461, lr_0 = 2.2866e-04
Loss = 6.4210e-03, PNorm = 182.4073, GNorm = 0.2278, lr_0 = 2.2851e-04
Loss = 1.0537e-02, PNorm = 182.4122, GNorm = 0.1744, lr_0 = 2.2835e-04
Loss = 1.0379e-02, PNorm = 182.4172, GNorm = 0.1958, lr_0 = 2.2819e-04
Loss = 6.5701e-03, PNorm = 182.4238, GNorm = 0.3238, lr_0 = 2.2804e-04
Loss = 5.7188e-03, PNorm = 182.4297, GNorm = 0.1066, lr_0 = 2.2788e-04
Loss = 7.7358e-03, PNorm = 182.4374, GNorm = 0.1501, lr_0 = 2.2773e-04
Loss = 6.1516e-03, PNorm = 182.4448, GNorm = 0.1761, lr_0 = 2.2757e-04
Validation mae = 0.120923
Epoch 20
Loss = 5.4784e-03, PNorm = 182.4507, GNorm = 0.1556, lr_0 = 2.2741e-04
Loss = 9.2418e-03, PNorm = 182.4590, GNorm = 0.1723, lr_0 = 2.2726e-04
Loss = 5.4684e-03, PNorm = 182.4657, GNorm = 0.3643, lr_0 = 2.2710e-04
Loss = 3.8964e-03, PNorm = 182.4719, GNorm = 0.1707, lr_0 = 2.2695e-04
Loss = 4.9306e-03, PNorm = 182.4771, GNorm = 0.2082, lr_0 = 2.2679e-04
Loss = 4.6538e-03, PNorm = 182.4823, GNorm = 0.2016, lr_0 = 2.2664e-04
Loss = 4.7189e-03, PNorm = 182.4852, GNorm = 0.1080, lr_0 = 2.2648e-04
Loss = 6.9754e-03, PNorm = 182.4900, GNorm = 0.2242, lr_0 = 2.2632e-04
Loss = 5.7877e-03, PNorm = 182.4947, GNorm = 0.2312, lr_0 = 2.2617e-04
Loss = 7.1560e-03, PNorm = 182.4992, GNorm = 0.1248, lr_0 = 2.2601e-04
Loss = 7.3342e-03, PNorm = 182.5043, GNorm = 0.2068, lr_0 = 2.2586e-04
Loss = 6.0055e-03, PNorm = 182.5094, GNorm = 0.1680, lr_0 = 2.2571e-04
Loss = 7.1610e-03, PNorm = 182.5143, GNorm = 0.1841, lr_0 = 2.2555e-04
Loss = 6.9220e-03, PNorm = 182.5193, GNorm = 0.2833, lr_0 = 2.2540e-04
Loss = 7.1586e-03, PNorm = 182.5236, GNorm = 0.1233, lr_0 = 2.2524e-04
Loss = 6.6980e-03, PNorm = 182.5296, GNorm = 0.1539, lr_0 = 2.2509e-04
Loss = 5.5783e-03, PNorm = 182.5350, GNorm = 0.1384, lr_0 = 2.2493e-04
Loss = 6.4122e-03, PNorm = 182.5400, GNorm = 0.2924, lr_0 = 2.2478e-04
Loss = 4.3282e-03, PNorm = 182.5438, GNorm = 0.3121, lr_0 = 2.2463e-04
Loss = 6.9608e-03, PNorm = 182.5465, GNorm = 0.2113, lr_0 = 2.2447e-04
Loss = 5.6389e-03, PNorm = 182.5513, GNorm = 0.1157, lr_0 = 2.2432e-04
Loss = 9.4550e-03, PNorm = 182.5544, GNorm = 0.1142, lr_0 = 2.2416e-04
Loss = 6.3045e-03, PNorm = 182.5615, GNorm = 0.2129, lr_0 = 2.2401e-04
Loss = 3.8239e-03, PNorm = 182.5682, GNorm = 0.1256, lr_0 = 2.2386e-04
Loss = 9.8510e-03, PNorm = 182.5736, GNorm = 0.1763, lr_0 = 2.2370e-04
Loss = 4.2838e-03, PNorm = 182.5790, GNorm = 0.2075, lr_0 = 2.2355e-04
Loss = 7.1732e-03, PNorm = 182.5852, GNorm = 0.3435, lr_0 = 2.2340e-04
Loss = 6.6614e-03, PNorm = 182.5895, GNorm = 0.2329, lr_0 = 2.2324e-04
Loss = 4.2963e-03, PNorm = 182.5943, GNorm = 0.2900, lr_0 = 2.2309e-04
Loss = 5.9924e-03, PNorm = 182.5984, GNorm = 0.1449, lr_0 = 2.2294e-04
Loss = 7.7980e-03, PNorm = 182.6025, GNorm = 0.1359, lr_0 = 2.2279e-04
Loss = 6.1926e-03, PNorm = 182.6069, GNorm = 0.1956, lr_0 = 2.2263e-04
Loss = 5.3160e-03, PNorm = 182.6114, GNorm = 0.3269, lr_0 = 2.2248e-04
Loss = 6.2384e-03, PNorm = 182.6147, GNorm = 1.0870, lr_0 = 2.2233e-04
Loss = 5.0139e-03, PNorm = 182.6186, GNorm = 0.1870, lr_0 = 2.2218e-04
Loss = 5.4402e-03, PNorm = 182.6278, GNorm = 0.2275, lr_0 = 2.2202e-04
Loss = 5.1007e-03, PNorm = 182.6334, GNorm = 0.3673, lr_0 = 2.2187e-04
Loss = 3.6077e-03, PNorm = 182.6386, GNorm = 0.1417, lr_0 = 2.2172e-04
Loss = 9.6322e-03, PNorm = 182.6454, GNorm = 0.1689, lr_0 = 2.2157e-04
Loss = 4.1515e-03, PNorm = 182.6504, GNorm = 0.1551, lr_0 = 2.2142e-04
Loss = 6.6053e-03, PNorm = 182.6563, GNorm = 0.2519, lr_0 = 2.2126e-04
Loss = 4.2151e-03, PNorm = 182.6620, GNorm = 0.1297, lr_0 = 2.2111e-04
Loss = 5.0959e-03, PNorm = 182.6650, GNorm = 0.1749, lr_0 = 2.2096e-04
Loss = 5.9128e-03, PNorm = 182.6705, GNorm = 0.3516, lr_0 = 2.2081e-04
Loss = 5.0050e-03, PNorm = 182.6776, GNorm = 0.1170, lr_0 = 2.2066e-04
Loss = 4.1279e-03, PNorm = 182.6838, GNorm = 0.1719, lr_0 = 2.2051e-04
Loss = 4.5354e-03, PNorm = 182.6910, GNorm = 0.1882, lr_0 = 2.2036e-04
Loss = 4.4734e-03, PNorm = 182.6990, GNorm = 0.1452, lr_0 = 2.2021e-04
Loss = 4.5212e-03, PNorm = 182.7045, GNorm = 0.1594, lr_0 = 2.2005e-04
Loss = 1.7896e-02, PNorm = 182.7117, GNorm = 2.4888, lr_0 = 2.1990e-04
Loss = 6.1566e-03, PNorm = 182.7175, GNorm = 0.2706, lr_0 = 2.1975e-04
Loss = 1.0040e-02, PNorm = 182.7202, GNorm = 0.3187, lr_0 = 2.1960e-04
Loss = 4.0165e-03, PNorm = 182.7259, GNorm = 0.1929, lr_0 = 2.1945e-04
Loss = 4.4556e-03, PNorm = 182.7326, GNorm = 0.2738, lr_0 = 2.1930e-04
Loss = 4.3784e-03, PNorm = 182.7387, GNorm = 0.1681, lr_0 = 2.1915e-04
Loss = 6.7591e-03, PNorm = 182.7444, GNorm = 0.1632, lr_0 = 2.1900e-04
Loss = 1.2168e-02, PNorm = 182.7475, GNorm = 0.0907, lr_0 = 2.1885e-04
Loss = 4.0746e-03, PNorm = 182.7530, GNorm = 0.1382, lr_0 = 2.1870e-04
Loss = 4.3914e-03, PNorm = 182.7589, GNorm = 0.1806, lr_0 = 2.1855e-04
Loss = 5.0925e-03, PNorm = 182.7643, GNorm = 0.1657, lr_0 = 2.1840e-04
Loss = 6.7113e-03, PNorm = 182.7665, GNorm = 0.0801, lr_0 = 2.1825e-04
Loss = 3.5763e-03, PNorm = 182.7726, GNorm = 0.2866, lr_0 = 2.1810e-04
Loss = 1.1903e-02, PNorm = 182.7759, GNorm = 0.2718, lr_0 = 2.1795e-04
Loss = 3.6539e-03, PNorm = 182.7808, GNorm = 0.1937, lr_0 = 2.1780e-04
Loss = 6.6942e-03, PNorm = 182.7882, GNorm = 0.1122, lr_0 = 2.1765e-04
Loss = 5.5659e-03, PNorm = 182.7960, GNorm = 0.1566, lr_0 = 2.1751e-04
Loss = 8.2521e-03, PNorm = 182.8033, GNorm = 0.3694, lr_0 = 2.1736e-04
Loss = 7.5429e-03, PNorm = 182.8086, GNorm = 0.3288, lr_0 = 2.1721e-04
Loss = 4.6014e-03, PNorm = 182.8138, GNorm = 0.1174, lr_0 = 2.1706e-04
Loss = 7.6051e-03, PNorm = 182.8227, GNorm = 0.1876, lr_0 = 2.1691e-04
Loss = 5.7070e-03, PNorm = 182.8293, GNorm = 0.0996, lr_0 = 2.1676e-04
Loss = 7.0313e-03, PNorm = 182.8357, GNorm = 0.1239, lr_0 = 2.1661e-04
Loss = 7.1017e-03, PNorm = 182.8419, GNorm = 0.5228, lr_0 = 2.1646e-04
Loss = 4.9629e-03, PNorm = 182.8472, GNorm = 0.1336, lr_0 = 2.1632e-04
Loss = 5.7640e-03, PNorm = 182.8505, GNorm = 0.2709, lr_0 = 2.1617e-04
Loss = 4.7665e-03, PNorm = 182.8537, GNorm = 0.1047, lr_0 = 2.1602e-04
Loss = 7.8134e-03, PNorm = 182.8584, GNorm = 0.1870, lr_0 = 2.1587e-04
Loss = 6.1356e-03, PNorm = 182.8632, GNorm = 0.8199, lr_0 = 2.1572e-04
Loss = 4.4375e-03, PNorm = 182.8683, GNorm = 0.1393, lr_0 = 2.1558e-04
Loss = 4.6870e-03, PNorm = 182.8755, GNorm = 0.1500, lr_0 = 2.1543e-04
Loss = 1.0300e-02, PNorm = 182.8822, GNorm = 0.1080, lr_0 = 2.1528e-04
Loss = 4.2621e-03, PNorm = 182.8875, GNorm = 0.2159, lr_0 = 2.1513e-04
Loss = 8.3776e-03, PNorm = 182.8925, GNorm = 0.1741, lr_0 = 2.1499e-04
Loss = 7.5243e-03, PNorm = 182.8973, GNorm = 0.1587, lr_0 = 2.1484e-04
Loss = 7.2603e-03, PNorm = 182.9023, GNorm = 0.2435, lr_0 = 2.1469e-04
Loss = 5.7010e-03, PNorm = 182.9070, GNorm = 0.2179, lr_0 = 2.1454e-04
Loss = 7.3267e-03, PNorm = 182.9121, GNorm = 0.0865, lr_0 = 2.1440e-04
Loss = 6.1671e-03, PNorm = 182.9172, GNorm = 0.1301, lr_0 = 2.1425e-04
Loss = 5.2406e-03, PNorm = 182.9233, GNorm = 0.1615, lr_0 = 2.1410e-04
Loss = 5.8642e-03, PNorm = 182.9298, GNorm = 0.1510, lr_0 = 2.1396e-04
Loss = 5.4796e-03, PNorm = 182.9346, GNorm = 0.1863, lr_0 = 2.1381e-04
Loss = 6.7237e-03, PNorm = 182.9397, GNorm = 0.1298, lr_0 = 2.1366e-04
Loss = 4.3062e-03, PNorm = 182.9473, GNorm = 0.2521, lr_0 = 2.1352e-04
Loss = 1.0862e-02, PNorm = 182.9522, GNorm = 0.4405, lr_0 = 2.1337e-04
Loss = 9.8708e-03, PNorm = 182.9561, GNorm = 0.1530, lr_0 = 2.1323e-04
Loss = 4.4062e-03, PNorm = 182.9597, GNorm = 0.1021, lr_0 = 2.1308e-04
Loss = 4.2242e-03, PNorm = 182.9654, GNorm = 0.1004, lr_0 = 2.1293e-04
Loss = 4.3217e-03, PNorm = 182.9712, GNorm = 0.2089, lr_0 = 2.1279e-04
Loss = 5.5987e-03, PNorm = 182.9774, GNorm = 0.2611, lr_0 = 2.1264e-04
Loss = 7.4330e-03, PNorm = 182.9833, GNorm = 0.7780, lr_0 = 2.1250e-04
Loss = 6.1150e-03, PNorm = 182.9897, GNorm = 0.2650, lr_0 = 2.1235e-04
Loss = 5.5560e-03, PNorm = 182.9957, GNorm = 0.0848, lr_0 = 2.1221e-04
Loss = 1.0954e-02, PNorm = 183.0006, GNorm = 0.1889, lr_0 = 2.1206e-04
Loss = 1.1195e-02, PNorm = 183.0066, GNorm = 0.1312, lr_0 = 2.1191e-04
Loss = 4.6770e-03, PNorm = 183.0112, GNorm = 0.1611, lr_0 = 2.1177e-04
Loss = 9.6705e-03, PNorm = 183.0176, GNorm = 0.3421, lr_0 = 2.1162e-04
Loss = 5.6122e-03, PNorm = 183.0219, GNorm = 0.2418, lr_0 = 2.1148e-04
Loss = 7.6317e-03, PNorm = 183.0284, GNorm = 0.1352, lr_0 = 2.1133e-04
Loss = 1.2497e-02, PNorm = 183.0347, GNorm = 0.8171, lr_0 = 2.1119e-04
Loss = 7.1185e-03, PNorm = 183.0413, GNorm = 0.1244, lr_0 = 2.1104e-04
Loss = 6.9759e-03, PNorm = 183.0480, GNorm = 0.1231, lr_0 = 2.1090e-04
Loss = 6.2304e-03, PNorm = 183.0532, GNorm = 0.1915, lr_0 = 2.1076e-04
Loss = 6.1331e-03, PNorm = 183.0581, GNorm = 0.1670, lr_0 = 2.1061e-04
Loss = 8.7201e-03, PNorm = 183.0629, GNorm = 0.2016, lr_0 = 2.1047e-04
Loss = 5.3435e-03, PNorm = 183.0706, GNorm = 0.1119, lr_0 = 2.1032e-04
Loss = 9.8404e-03, PNorm = 183.0757, GNorm = 0.1433, lr_0 = 2.1018e-04
Loss = 1.0740e-02, PNorm = 183.0811, GNorm = 0.2494, lr_0 = 2.1003e-04
Loss = 5.0185e-03, PNorm = 183.0887, GNorm = 0.1943, lr_0 = 2.0989e-04
Loss = 5.5635e-03, PNorm = 183.0938, GNorm = 0.2085, lr_0 = 2.0975e-04
Loss = 5.5713e-03, PNorm = 183.0979, GNorm = 0.1129, lr_0 = 2.0960e-04
Validation mae = 0.120956
Epoch 21
Loss = 5.7721e-03, PNorm = 183.1026, GNorm = 0.5663, lr_0 = 2.0946e-04
Loss = 5.5097e-03, PNorm = 183.1083, GNorm = 0.1884, lr_0 = 2.0932e-04
Loss = 4.6323e-03, PNorm = 183.1139, GNorm = 0.2465, lr_0 = 2.0917e-04
Loss = 6.8715e-03, PNorm = 183.1186, GNorm = 0.1430, lr_0 = 2.0903e-04
Loss = 5.3309e-03, PNorm = 183.1232, GNorm = 0.0683, lr_0 = 2.0889e-04
Loss = 5.6705e-03, PNorm = 183.1282, GNorm = 0.1560, lr_0 = 2.0874e-04
Loss = 4.7906e-03, PNorm = 183.1321, GNorm = 0.0802, lr_0 = 2.0860e-04
Loss = 3.9899e-03, PNorm = 183.1359, GNorm = 0.1644, lr_0 = 2.0846e-04
Loss = 8.2325e-03, PNorm = 183.1378, GNorm = 0.2160, lr_0 = 2.0831e-04
Loss = 3.0468e-03, PNorm = 183.1412, GNorm = 0.1150, lr_0 = 2.0817e-04
Loss = 3.9216e-03, PNorm = 183.1463, GNorm = 0.1651, lr_0 = 2.0803e-04
Loss = 4.4356e-03, PNorm = 183.1509, GNorm = 0.2059, lr_0 = 2.0789e-04
Loss = 4.1986e-03, PNorm = 183.1548, GNorm = 0.1895, lr_0 = 2.0774e-04
Loss = 3.5926e-03, PNorm = 183.1573, GNorm = 0.1828, lr_0 = 2.0760e-04
Loss = 3.1584e-03, PNorm = 183.1603, GNorm = 0.1099, lr_0 = 2.0746e-04
Loss = 4.2459e-03, PNorm = 183.1639, GNorm = 0.1510, lr_0 = 2.0732e-04
Loss = 3.4343e-03, PNorm = 183.1700, GNorm = 0.2087, lr_0 = 2.0718e-04
Loss = 4.7229e-03, PNorm = 183.1745, GNorm = 0.1178, lr_0 = 2.0703e-04
Loss = 3.5419e-03, PNorm = 183.1780, GNorm = 0.1943, lr_0 = 2.0689e-04
Loss = 5.8195e-03, PNorm = 183.1817, GNorm = 0.2244, lr_0 = 2.0675e-04
Loss = 6.0345e-03, PNorm = 183.1877, GNorm = 0.0879, lr_0 = 2.0661e-04
Loss = 8.5679e-03, PNorm = 183.1926, GNorm = 0.1040, lr_0 = 2.0647e-04
Loss = 4.2133e-03, PNorm = 183.1956, GNorm = 0.2502, lr_0 = 2.0633e-04
Loss = 4.7147e-03, PNorm = 183.2005, GNorm = 0.1252, lr_0 = 2.0618e-04
Loss = 5.6945e-03, PNorm = 183.2032, GNorm = 0.2501, lr_0 = 2.0604e-04
Loss = 4.5620e-03, PNorm = 183.2056, GNorm = 0.1393, lr_0 = 2.0590e-04
Loss = 5.4348e-03, PNorm = 183.2100, GNorm = 0.2881, lr_0 = 2.0576e-04
Loss = 3.4777e-03, PNorm = 183.2135, GNorm = 0.0853, lr_0 = 2.0562e-04
Loss = 3.9085e-03, PNorm = 183.2161, GNorm = 0.2151, lr_0 = 2.0548e-04
Loss = 3.2054e-03, PNorm = 183.2200, GNorm = 0.1218, lr_0 = 2.0534e-04
Loss = 6.8285e-03, PNorm = 183.2241, GNorm = 0.1111, lr_0 = 2.0520e-04
Loss = 4.9650e-03, PNorm = 183.2275, GNorm = 0.1147, lr_0 = 2.0506e-04
Loss = 5.8719e-03, PNorm = 183.2324, GNorm = 0.0892, lr_0 = 2.0492e-04
Loss = 3.1774e-03, PNorm = 183.2379, GNorm = 0.1045, lr_0 = 2.0478e-04
Loss = 4.3875e-03, PNorm = 183.2419, GNorm = 0.0818, lr_0 = 2.0464e-04
Loss = 5.4748e-03, PNorm = 183.2467, GNorm = 0.1340, lr_0 = 2.0450e-04
Loss = 5.5057e-03, PNorm = 183.2487, GNorm = 0.1137, lr_0 = 2.0436e-04
Loss = 6.3182e-03, PNorm = 183.2522, GNorm = 0.3126, lr_0 = 2.0422e-04
Loss = 3.6969e-03, PNorm = 183.2587, GNorm = 0.1729, lr_0 = 2.0408e-04
Loss = 4.6277e-03, PNorm = 183.2622, GNorm = 0.2827, lr_0 = 2.0394e-04
Loss = 4.0215e-03, PNorm = 183.2674, GNorm = 0.1698, lr_0 = 2.0380e-04
Loss = 6.2552e-03, PNorm = 183.2711, GNorm = 0.3118, lr_0 = 2.0366e-04
Loss = 9.4355e-03, PNorm = 183.2727, GNorm = 0.3843, lr_0 = 2.0352e-04
Loss = 3.7717e-03, PNorm = 183.2810, GNorm = 0.1675, lr_0 = 2.0338e-04
Loss = 8.0028e-03, PNorm = 183.2876, GNorm = 0.1322, lr_0 = 2.0324e-04
Loss = 3.9656e-03, PNorm = 183.2960, GNorm = 0.1994, lr_0 = 2.0310e-04
Loss = 4.8116e-03, PNorm = 183.3027, GNorm = 0.1420, lr_0 = 2.0296e-04
Loss = 5.8148e-03, PNorm = 183.3061, GNorm = 0.2493, lr_0 = 2.0282e-04
Loss = 3.9004e-03, PNorm = 183.3101, GNorm = 0.2452, lr_0 = 2.0268e-04
Loss = 6.3653e-03, PNorm = 183.3176, GNorm = 0.2534, lr_0 = 2.0254e-04
Loss = 5.6412e-03, PNorm = 183.3217, GNorm = 0.3212, lr_0 = 2.0240e-04
Loss = 7.0305e-03, PNorm = 183.3263, GNorm = 0.1944, lr_0 = 2.0227e-04
Loss = 1.2638e-02, PNorm = 183.3291, GNorm = 0.1667, lr_0 = 2.0213e-04
Loss = 6.0020e-03, PNorm = 183.3326, GNorm = 0.2208, lr_0 = 2.0199e-04
Loss = 5.2572e-03, PNorm = 183.3355, GNorm = 0.0957, lr_0 = 2.0185e-04
Loss = 7.0829e-03, PNorm = 183.3394, GNorm = 0.2688, lr_0 = 2.0171e-04
Loss = 5.9972e-03, PNorm = 183.3448, GNorm = 0.1000, lr_0 = 2.0157e-04
Loss = 4.6837e-03, PNorm = 183.3499, GNorm = 0.0960, lr_0 = 2.0144e-04
Loss = 5.5341e-03, PNorm = 183.3537, GNorm = 0.1913, lr_0 = 2.0130e-04
Loss = 3.0746e-03, PNorm = 183.3568, GNorm = 0.1170, lr_0 = 2.0116e-04
Loss = 6.2548e-03, PNorm = 183.3597, GNorm = 0.1164, lr_0 = 2.0102e-04
Loss = 4.1993e-03, PNorm = 183.3633, GNorm = 0.2603, lr_0 = 2.0088e-04
Loss = 3.7199e-03, PNorm = 183.3669, GNorm = 0.1091, lr_0 = 2.0075e-04
Loss = 3.8554e-03, PNorm = 183.3707, GNorm = 0.2615, lr_0 = 2.0061e-04
Loss = 4.9674e-03, PNorm = 183.3741, GNorm = 0.1159, lr_0 = 2.0047e-04
Loss = 5.2934e-03, PNorm = 183.3766, GNorm = 0.0810, lr_0 = 2.0033e-04
Loss = 3.5640e-03, PNorm = 183.3795, GNorm = 0.1424, lr_0 = 2.0020e-04
Loss = 2.9005e-03, PNorm = 183.3837, GNorm = 0.0894, lr_0 = 2.0006e-04
Loss = 3.8611e-03, PNorm = 183.3878, GNorm = 0.1953, lr_0 = 1.9992e-04
Loss = 8.0988e-03, PNorm = 183.3927, GNorm = 0.1157, lr_0 = 1.9979e-04
Loss = 6.3223e-03, PNorm = 183.3951, GNorm = 0.2112, lr_0 = 1.9965e-04
Loss = 3.2335e-03, PNorm = 183.3989, GNorm = 0.1057, lr_0 = 1.9951e-04
Loss = 8.9760e-03, PNorm = 183.4022, GNorm = 0.1627, lr_0 = 1.9938e-04
Loss = 1.6792e-02, PNorm = 183.4080, GNorm = 0.0829, lr_0 = 1.9924e-04
Loss = 6.0966e-03, PNorm = 183.4144, GNorm = 0.2007, lr_0 = 1.9910e-04
Loss = 4.0569e-03, PNorm = 183.4205, GNorm = 0.1462, lr_0 = 1.9897e-04
Loss = 6.0543e-03, PNorm = 183.4245, GNorm = 0.0788, lr_0 = 1.9883e-04
Loss = 3.6875e-03, PNorm = 183.4285, GNorm = 0.1156, lr_0 = 1.9869e-04
Loss = 1.0937e-02, PNorm = 183.4347, GNorm = 0.1033, lr_0 = 1.9856e-04
Loss = 7.0428e-03, PNorm = 183.4394, GNorm = 0.2599, lr_0 = 1.9842e-04
Loss = 6.1479e-03, PNorm = 183.4443, GNorm = 0.1101, lr_0 = 1.9829e-04
Loss = 3.8042e-03, PNorm = 183.4483, GNorm = 0.0876, lr_0 = 1.9815e-04
Loss = 6.5835e-03, PNorm = 183.4511, GNorm = 0.2612, lr_0 = 1.9801e-04
Loss = 4.1378e-03, PNorm = 183.4552, GNorm = 0.3072, lr_0 = 1.9788e-04
Loss = 4.0798e-03, PNorm = 183.4606, GNorm = 0.1193, lr_0 = 1.9774e-04
Loss = 4.1458e-03, PNorm = 183.4669, GNorm = 0.2176, lr_0 = 1.9761e-04
Loss = 9.5091e-03, PNorm = 183.4735, GNorm = 0.2239, lr_0 = 1.9747e-04
Loss = 4.1072e-03, PNorm = 183.4806, GNorm = 0.2921, lr_0 = 1.9734e-04
Loss = 4.0105e-03, PNorm = 183.4871, GNorm = 0.1843, lr_0 = 1.9720e-04
Loss = 3.9970e-03, PNorm = 183.4925, GNorm = 0.1250, lr_0 = 1.9707e-04
Loss = 3.2143e-03, PNorm = 183.4986, GNorm = 0.1705, lr_0 = 1.9693e-04
Loss = 7.1135e-03, PNorm = 183.5027, GNorm = 0.1236, lr_0 = 1.9680e-04
Loss = 4.8769e-03, PNorm = 183.5062, GNorm = 0.1727, lr_0 = 1.9666e-04
Loss = 9.2145e-03, PNorm = 183.5113, GNorm = 0.1882, lr_0 = 1.9653e-04
Loss = 6.6327e-03, PNorm = 183.5150, GNorm = 0.1372, lr_0 = 1.9639e-04
Loss = 1.0008e-02, PNorm = 183.5209, GNorm = 0.1483, lr_0 = 1.9626e-04
Loss = 4.5329e-03, PNorm = 183.5255, GNorm = 0.2606, lr_0 = 1.9612e-04
Loss = 3.9105e-03, PNorm = 183.5293, GNorm = 0.2202, lr_0 = 1.9599e-04
Loss = 4.4694e-03, PNorm = 183.5340, GNorm = 0.2310, lr_0 = 1.9585e-04
Loss = 8.4041e-03, PNorm = 183.5401, GNorm = 0.1115, lr_0 = 1.9572e-04
Loss = 5.0077e-03, PNorm = 183.5443, GNorm = 0.2729, lr_0 = 1.9559e-04
Loss = 4.6815e-03, PNorm = 183.5477, GNorm = 0.1296, lr_0 = 1.9545e-04
Loss = 1.5949e-02, PNorm = 183.5522, GNorm = 0.3056, lr_0 = 1.9532e-04
Loss = 6.5438e-03, PNorm = 183.5561, GNorm = 0.1964, lr_0 = 1.9518e-04
Loss = 7.3065e-03, PNorm = 183.5623, GNorm = 0.1491, lr_0 = 1.9505e-04
Loss = 1.3071e-02, PNorm = 183.5656, GNorm = 0.1379, lr_0 = 1.9492e-04
Loss = 4.7125e-03, PNorm = 183.5686, GNorm = 0.2268, lr_0 = 1.9478e-04
Loss = 3.1550e-03, PNorm = 183.5733, GNorm = 0.2174, lr_0 = 1.9465e-04
Loss = 4.6717e-03, PNorm = 183.5782, GNorm = 0.1764, lr_0 = 1.9452e-04
Loss = 3.7101e-03, PNorm = 183.5834, GNorm = 0.1880, lr_0 = 1.9438e-04
Loss = 9.9458e-03, PNorm = 183.5879, GNorm = 0.3952, lr_0 = 1.9425e-04
Loss = 3.7788e-03, PNorm = 183.5946, GNorm = 0.1360, lr_0 = 1.9412e-04
Loss = 4.4087e-03, PNorm = 183.5995, GNorm = 0.2673, lr_0 = 1.9398e-04
Loss = 4.6177e-03, PNorm = 183.6037, GNorm = 0.2479, lr_0 = 1.9385e-04
Loss = 6.3755e-03, PNorm = 183.6064, GNorm = 0.3216, lr_0 = 1.9372e-04
Loss = 9.7355e-03, PNorm = 183.6087, GNorm = 0.1993, lr_0 = 1.9359e-04
Loss = 4.9819e-03, PNorm = 183.6153, GNorm = 0.5205, lr_0 = 1.9345e-04
Loss = 1.1331e-02, PNorm = 183.6185, GNorm = 0.2550, lr_0 = 1.9332e-04
Loss = 3.9916e-03, PNorm = 183.6224, GNorm = 0.1003, lr_0 = 1.9319e-04
Loss = 8.3718e-03, PNorm = 183.6266, GNorm = 0.1213, lr_0 = 1.9306e-04
Validation mae = 0.120751
Epoch 22
Loss = 2.9291e-03, PNorm = 183.6310, GNorm = 0.2868, lr_0 = 1.9292e-04
Loss = 4.1205e-03, PNorm = 183.6323, GNorm = 0.1420, lr_0 = 1.9279e-04
Loss = 6.3822e-03, PNorm = 183.6351, GNorm = 0.0742, lr_0 = 1.9266e-04
Loss = 3.4577e-03, PNorm = 183.6374, GNorm = 0.0608, lr_0 = 1.9253e-04
Loss = 4.7472e-03, PNorm = 183.6419, GNorm = 0.2026, lr_0 = 1.9240e-04
Loss = 7.9138e-03, PNorm = 183.6468, GNorm = 1.4267, lr_0 = 1.9226e-04
Loss = 3.3709e-03, PNorm = 183.6510, GNorm = 0.1565, lr_0 = 1.9213e-04
Loss = 3.2546e-03, PNorm = 183.6559, GNorm = 0.1814, lr_0 = 1.9200e-04
Loss = 7.4970e-03, PNorm = 183.6602, GNorm = 0.2881, lr_0 = 1.9187e-04
Loss = 4.6019e-03, PNorm = 183.6648, GNorm = 0.5818, lr_0 = 1.9174e-04
Loss = 2.8230e-03, PNorm = 183.6702, GNorm = 0.3734, lr_0 = 1.9161e-04
Loss = 3.4142e-03, PNorm = 183.6755, GNorm = 0.1509, lr_0 = 1.9148e-04
Loss = 3.4036e-03, PNorm = 183.6768, GNorm = 0.1019, lr_0 = 1.9134e-04
Loss = 4.4270e-03, PNorm = 183.6790, GNorm = 0.2458, lr_0 = 1.9121e-04
Loss = 6.8931e-03, PNorm = 183.6835, GNorm = 0.2167, lr_0 = 1.9108e-04
Loss = 3.2301e-03, PNorm = 183.6889, GNorm = 0.1479, lr_0 = 1.9095e-04
Loss = 1.0734e-02, PNorm = 183.6926, GNorm = 0.2819, lr_0 = 1.9082e-04
Loss = 4.0479e-03, PNorm = 183.6978, GNorm = 0.1309, lr_0 = 1.9069e-04
Loss = 3.6751e-03, PNorm = 183.7018, GNorm = 0.1121, lr_0 = 1.9056e-04
Loss = 1.1842e-02, PNorm = 183.7051, GNorm = 0.4983, lr_0 = 1.9043e-04
Loss = 2.4181e-03, PNorm = 183.7106, GNorm = 0.0734, lr_0 = 1.9030e-04
Loss = 4.1058e-03, PNorm = 183.7143, GNorm = 0.0933, lr_0 = 1.9017e-04
Loss = 7.0156e-03, PNorm = 183.7191, GNorm = 0.4536, lr_0 = 1.9004e-04
Loss = 5.2240e-03, PNorm = 183.7226, GNorm = 0.1520, lr_0 = 1.8991e-04
Loss = 4.8528e-03, PNorm = 183.7236, GNorm = 0.2594, lr_0 = 1.8978e-04
Loss = 4.1635e-03, PNorm = 183.7254, GNorm = 0.3299, lr_0 = 1.8965e-04
Loss = 4.8357e-03, PNorm = 183.7274, GNorm = 0.2706, lr_0 = 1.8952e-04
Loss = 6.3255e-03, PNorm = 183.7320, GNorm = 0.1900, lr_0 = 1.8939e-04
Loss = 4.5792e-03, PNorm = 183.7358, GNorm = 0.1336, lr_0 = 1.8926e-04
Loss = 1.0248e-02, PNorm = 183.7396, GNorm = 0.4604, lr_0 = 1.8913e-04
Loss = 5.5313e-03, PNorm = 183.7427, GNorm = 0.1672, lr_0 = 1.8900e-04
Loss = 3.3858e-03, PNorm = 183.7448, GNorm = 0.1758, lr_0 = 1.8887e-04
Loss = 5.8325e-03, PNorm = 183.7487, GNorm = 0.1063, lr_0 = 1.8874e-04
Loss = 4.5126e-03, PNorm = 183.7522, GNorm = 0.0646, lr_0 = 1.8861e-04
Loss = 5.7438e-03, PNorm = 183.7567, GNorm = 0.0782, lr_0 = 1.8848e-04
Loss = 6.2160e-03, PNorm = 183.7594, GNorm = 0.0705, lr_0 = 1.8835e-04
Loss = 4.4259e-03, PNorm = 183.7627, GNorm = 0.2016, lr_0 = 1.8822e-04
Loss = 3.0578e-03, PNorm = 183.7655, GNorm = 0.2301, lr_0 = 1.8809e-04
Loss = 5.4606e-03, PNorm = 183.7709, GNorm = 0.2539, lr_0 = 1.8797e-04
Loss = 3.1513e-03, PNorm = 183.7763, GNorm = 0.0888, lr_0 = 1.8784e-04
Loss = 3.9388e-03, PNorm = 183.7803, GNorm = 0.2105, lr_0 = 1.8771e-04
Loss = 7.0083e-03, PNorm = 183.7834, GNorm = 0.4207, lr_0 = 1.8758e-04
Loss = 5.5043e-03, PNorm = 183.7861, GNorm = 0.0883, lr_0 = 1.8745e-04
Loss = 2.8125e-03, PNorm = 183.7895, GNorm = 0.1110, lr_0 = 1.8732e-04
Loss = 4.7468e-03, PNorm = 183.7932, GNorm = 0.1985, lr_0 = 1.8719e-04
Loss = 8.3022e-03, PNorm = 183.7968, GNorm = 0.0762, lr_0 = 1.8707e-04
Loss = 2.6561e-03, PNorm = 183.7994, GNorm = 0.0970, lr_0 = 1.8694e-04
Loss = 5.5769e-03, PNorm = 183.8015, GNorm = 0.3075, lr_0 = 1.8681e-04
Loss = 5.4557e-03, PNorm = 183.8035, GNorm = 0.4531, lr_0 = 1.8668e-04
Loss = 1.5814e-02, PNorm = 183.8082, GNorm = 0.1902, lr_0 = 1.8655e-04
Loss = 3.0656e-03, PNorm = 183.8140, GNorm = 0.1114, lr_0 = 1.8643e-04
Loss = 6.3399e-03, PNorm = 183.8176, GNorm = 0.1095, lr_0 = 1.8630e-04
Loss = 4.3795e-03, PNorm = 183.8216, GNorm = 0.2065, lr_0 = 1.8617e-04
Loss = 3.2294e-03, PNorm = 183.8270, GNorm = 0.1943, lr_0 = 1.8604e-04
Loss = 4.2525e-03, PNorm = 183.8308, GNorm = 0.0992, lr_0 = 1.8592e-04
Loss = 3.6080e-03, PNorm = 183.8328, GNorm = 0.1096, lr_0 = 1.8579e-04
Loss = 5.2407e-03, PNorm = 183.8348, GNorm = 0.1744, lr_0 = 1.8566e-04
Loss = 3.4279e-03, PNorm = 183.8376, GNorm = 0.1612, lr_0 = 1.8553e-04
Loss = 4.6837e-03, PNorm = 183.8399, GNorm = 0.0768, lr_0 = 1.8541e-04
Loss = 2.8909e-03, PNorm = 183.8424, GNorm = 0.1997, lr_0 = 1.8528e-04
Loss = 5.9690e-03, PNorm = 183.8447, GNorm = 0.1425, lr_0 = 1.8515e-04
Loss = 5.9787e-03, PNorm = 183.8483, GNorm = 0.1439, lr_0 = 1.8503e-04
Loss = 8.2841e-03, PNorm = 183.8523, GNorm = 0.0660, lr_0 = 1.8490e-04
Loss = 5.7422e-03, PNorm = 183.8568, GNorm = 0.1692, lr_0 = 1.8477e-04
Loss = 2.7557e-03, PNorm = 183.8615, GNorm = 0.0885, lr_0 = 1.8465e-04
Loss = 7.4062e-03, PNorm = 183.8683, GNorm = 0.9436, lr_0 = 1.8452e-04
Loss = 4.9137e-03, PNorm = 183.8735, GNorm = 0.1549, lr_0 = 1.8439e-04
Loss = 7.1985e-03, PNorm = 183.8778, GNorm = 0.3630, lr_0 = 1.8427e-04
Loss = 6.0878e-03, PNorm = 183.8823, GNorm = 0.2689, lr_0 = 1.8414e-04
Loss = 3.5331e-03, PNorm = 183.8868, GNorm = 0.1731, lr_0 = 1.8401e-04
Loss = 9.4457e-03, PNorm = 183.8922, GNorm = 0.1574, lr_0 = 1.8389e-04
Loss = 4.6840e-03, PNorm = 183.8958, GNorm = 0.1665, lr_0 = 1.8376e-04
Loss = 2.1816e-03, PNorm = 183.8991, GNorm = 0.1097, lr_0 = 1.8364e-04
Loss = 3.6521e-03, PNorm = 183.9025, GNorm = 0.0685, lr_0 = 1.8351e-04
Loss = 3.7425e-03, PNorm = 183.9054, GNorm = 0.1826, lr_0 = 1.8338e-04
Loss = 5.9235e-03, PNorm = 183.9084, GNorm = 0.1656, lr_0 = 1.8326e-04
Loss = 7.6511e-03, PNorm = 183.9109, GNorm = 0.1106, lr_0 = 1.8313e-04
Loss = 3.2110e-03, PNorm = 183.9147, GNorm = 0.1520, lr_0 = 1.8301e-04
Loss = 6.8753e-03, PNorm = 183.9190, GNorm = 0.1576, lr_0 = 1.8288e-04
Loss = 3.8720e-03, PNorm = 183.9247, GNorm = 0.0867, lr_0 = 1.8276e-04
Loss = 3.6715e-03, PNorm = 183.9289, GNorm = 0.0825, lr_0 = 1.8263e-04
Loss = 5.7681e-03, PNorm = 183.9334, GNorm = 0.1174, lr_0 = 1.8251e-04
Loss = 2.6673e-03, PNorm = 183.9366, GNorm = 0.1325, lr_0 = 1.8238e-04
Loss = 5.8055e-03, PNorm = 183.9386, GNorm = 0.3601, lr_0 = 1.8226e-04
Loss = 5.3033e-03, PNorm = 183.9406, GNorm = 0.1722, lr_0 = 1.8213e-04
Loss = 7.9874e-03, PNorm = 183.9431, GNorm = 0.2031, lr_0 = 1.8201e-04
Loss = 3.7864e-03, PNorm = 183.9462, GNorm = 0.1574, lr_0 = 1.8188e-04
Loss = 3.2921e-03, PNorm = 183.9500, GNorm = 0.1468, lr_0 = 1.8176e-04
Loss = 9.4837e-03, PNorm = 183.9549, GNorm = 0.1069, lr_0 = 1.8163e-04
Loss = 4.6575e-03, PNorm = 183.9583, GNorm = 0.3112, lr_0 = 1.8151e-04
Loss = 4.3245e-03, PNorm = 183.9615, GNorm = 0.1626, lr_0 = 1.8138e-04
Loss = 6.8550e-03, PNorm = 183.9651, GNorm = 0.0592, lr_0 = 1.8126e-04
Loss = 3.6802e-03, PNorm = 183.9674, GNorm = 0.3197, lr_0 = 1.8114e-04
Loss = 5.9205e-03, PNorm = 183.9701, GNorm = 0.3423, lr_0 = 1.8101e-04
Loss = 3.7655e-03, PNorm = 183.9740, GNorm = 0.1675, lr_0 = 1.8089e-04
Loss = 5.8491e-03, PNorm = 183.9784, GNorm = 0.1034, lr_0 = 1.8076e-04
Loss = 5.4213e-03, PNorm = 183.9841, GNorm = 0.1639, lr_0 = 1.8064e-04
Loss = 6.0375e-03, PNorm = 183.9891, GNorm = 0.1225, lr_0 = 1.8052e-04
Loss = 4.7445e-03, PNorm = 183.9923, GNorm = 0.1351, lr_0 = 1.8039e-04
Loss = 2.5762e-03, PNorm = 183.9961, GNorm = 0.2401, lr_0 = 1.8027e-04
Loss = 4.7153e-03, PNorm = 184.0001, GNorm = 0.2863, lr_0 = 1.8015e-04
Loss = 3.6113e-03, PNorm = 184.0045, GNorm = 0.1552, lr_0 = 1.8002e-04
Loss = 6.7283e-03, PNorm = 184.0074, GNorm = 0.1300, lr_0 = 1.7990e-04
Loss = 3.9033e-03, PNorm = 184.0127, GNorm = 0.0560, lr_0 = 1.7978e-04
Loss = 3.7346e-03, PNorm = 184.0158, GNorm = 0.1083, lr_0 = 1.7965e-04
Loss = 3.0923e-03, PNorm = 184.0173, GNorm = 0.1855, lr_0 = 1.7953e-04
Loss = 4.7053e-03, PNorm = 184.0200, GNorm = 0.0923, lr_0 = 1.7941e-04
Loss = 3.7954e-03, PNorm = 184.0233, GNorm = 0.1449, lr_0 = 1.7928e-04
Loss = 3.7770e-03, PNorm = 184.0291, GNorm = 0.1331, lr_0 = 1.7916e-04
Loss = 7.7996e-03, PNorm = 184.0349, GNorm = 0.1190, lr_0 = 1.7904e-04
Loss = 4.1313e-03, PNorm = 184.0390, GNorm = 0.1641, lr_0 = 1.7892e-04
Loss = 6.2552e-03, PNorm = 184.0434, GNorm = 0.0685, lr_0 = 1.7879e-04
Loss = 3.2708e-03, PNorm = 184.0477, GNorm = 0.1221, lr_0 = 1.7867e-04
Loss = 2.6490e-03, PNorm = 184.0520, GNorm = 0.2986, lr_0 = 1.7855e-04
Loss = 4.1633e-03, PNorm = 184.0543, GNorm = 0.1798, lr_0 = 1.7843e-04
Loss = 2.8374e-03, PNorm = 184.0581, GNorm = 0.1138, lr_0 = 1.7830e-04
Loss = 4.9779e-03, PNorm = 184.0592, GNorm = 0.1278, lr_0 = 1.7818e-04
Loss = 5.1841e-03, PNorm = 184.0623, GNorm = 0.0659, lr_0 = 1.7806e-04
Loss = 3.0756e-03, PNorm = 184.0681, GNorm = 0.0975, lr_0 = 1.7794e-04
Loss = 4.9895e-03, PNorm = 184.0721, GNorm = 0.3684, lr_0 = 1.7782e-04
Validation mae = 0.120891
Epoch 23
Loss = 4.1214e-03, PNorm = 184.0756, GNorm = 0.1415, lr_0 = 1.7769e-04
Loss = 2.6321e-03, PNorm = 184.0784, GNorm = 0.0809, lr_0 = 1.7757e-04
Loss = 2.6188e-03, PNorm = 184.0801, GNorm = 0.1778, lr_0 = 1.7745e-04
Loss = 2.5028e-03, PNorm = 184.0838, GNorm = 0.1584, lr_0 = 1.7733e-04
Loss = 3.9882e-03, PNorm = 184.0873, GNorm = 0.1454, lr_0 = 1.7721e-04
Loss = 3.2035e-03, PNorm = 184.0903, GNorm = 0.1264, lr_0 = 1.7709e-04
Loss = 5.6751e-03, PNorm = 184.0949, GNorm = 0.0830, lr_0 = 1.7696e-04
Loss = 4.3756e-03, PNorm = 184.0985, GNorm = 0.2354, lr_0 = 1.7684e-04
Loss = 3.9061e-03, PNorm = 184.1023, GNorm = 0.4405, lr_0 = 1.7672e-04
Loss = 3.5235e-03, PNorm = 184.1065, GNorm = 0.1877, lr_0 = 1.7660e-04
Loss = 2.8484e-03, PNorm = 184.1075, GNorm = 0.1522, lr_0 = 1.7648e-04
Loss = 2.6753e-03, PNorm = 184.1084, GNorm = 0.2348, lr_0 = 1.7636e-04
Loss = 5.3825e-03, PNorm = 184.1110, GNorm = 0.0649, lr_0 = 1.7624e-04
Loss = 2.6148e-03, PNorm = 184.1136, GNorm = 0.1780, lr_0 = 1.7612e-04
Loss = 3.7435e-03, PNorm = 184.1171, GNorm = 0.2664, lr_0 = 1.7600e-04
Loss = 7.2842e-03, PNorm = 184.1185, GNorm = 0.1012, lr_0 = 1.7588e-04
Loss = 5.1984e-03, PNorm = 184.1199, GNorm = 0.2026, lr_0 = 1.7576e-04
Loss = 6.2296e-03, PNorm = 184.1228, GNorm = 0.0898, lr_0 = 1.7564e-04
Loss = 7.4345e-03, PNorm = 184.1260, GNorm = 0.0415, lr_0 = 1.7552e-04
Loss = 5.8209e-03, PNorm = 184.1290, GNorm = 0.1553, lr_0 = 1.7540e-04
Loss = 6.0876e-03, PNorm = 184.1315, GNorm = 0.1273, lr_0 = 1.7528e-04
Loss = 4.4865e-03, PNorm = 184.1356, GNorm = 0.3067, lr_0 = 1.7516e-04
Loss = 4.1130e-03, PNorm = 184.1401, GNorm = 0.1839, lr_0 = 1.7504e-04
Loss = 4.4026e-03, PNorm = 184.1428, GNorm = 0.1830, lr_0 = 1.7492e-04
Loss = 5.8504e-03, PNorm = 184.1464, GNorm = 0.1070, lr_0 = 1.7480e-04
Loss = 5.0926e-03, PNorm = 184.1488, GNorm = 0.1986, lr_0 = 1.7468e-04
Loss = 4.8487e-03, PNorm = 184.1502, GNorm = 0.1152, lr_0 = 1.7456e-04
Loss = 5.0524e-03, PNorm = 184.1524, GNorm = 0.0768, lr_0 = 1.7444e-04
Loss = 1.9378e-02, PNorm = 184.1570, GNorm = 2.7230, lr_0 = 1.7432e-04
Loss = 3.6382e-03, PNorm = 184.1605, GNorm = 0.1404, lr_0 = 1.7420e-04
Loss = 2.8431e-03, PNorm = 184.1622, GNorm = 0.1528, lr_0 = 1.7408e-04
Loss = 2.3671e-03, PNorm = 184.1652, GNorm = 0.0584, lr_0 = 1.7396e-04
Loss = 3.1722e-03, PNorm = 184.1675, GNorm = 0.2273, lr_0 = 1.7384e-04
Loss = 2.6826e-03, PNorm = 184.1702, GNorm = 0.1910, lr_0 = 1.7372e-04
Loss = 4.4655e-03, PNorm = 184.1712, GNorm = 0.2218, lr_0 = 1.7360e-04
Loss = 4.4184e-03, PNorm = 184.1722, GNorm = 0.1210, lr_0 = 1.7348e-04
Loss = 2.9403e-03, PNorm = 184.1752, GNorm = 0.0810, lr_0 = 1.7336e-04
Loss = 7.1544e-03, PNorm = 184.1787, GNorm = 0.1974, lr_0 = 1.7325e-04
Loss = 3.4613e-03, PNorm = 184.1823, GNorm = 0.2147, lr_0 = 1.7313e-04
Loss = 1.0430e-02, PNorm = 184.1839, GNorm = 0.1306, lr_0 = 1.7301e-04
Loss = 3.1403e-03, PNorm = 184.1892, GNorm = 0.1421, lr_0 = 1.7289e-04
Loss = 6.4783e-03, PNorm = 184.1918, GNorm = 0.1078, lr_0 = 1.7277e-04
Loss = 2.3429e-03, PNorm = 184.1948, GNorm = 0.0703, lr_0 = 1.7265e-04
Loss = 3.9227e-03, PNorm = 184.1991, GNorm = 0.1356, lr_0 = 1.7253e-04
Loss = 2.2360e-03, PNorm = 184.2021, GNorm = 0.0810, lr_0 = 1.7242e-04
Loss = 3.2704e-03, PNorm = 184.2064, GNorm = 0.1198, lr_0 = 1.7230e-04
Loss = 2.4238e-03, PNorm = 184.2091, GNorm = 0.2397, lr_0 = 1.7218e-04
Loss = 2.7979e-03, PNorm = 184.2108, GNorm = 0.1186, lr_0 = 1.7206e-04
Loss = 5.2509e-03, PNorm = 184.2136, GNorm = 0.1708, lr_0 = 1.7194e-04
Loss = 2.2486e-03, PNorm = 184.2159, GNorm = 0.1207, lr_0 = 1.7183e-04
Loss = 4.6400e-03, PNorm = 184.2180, GNorm = 0.1508, lr_0 = 1.7171e-04
Loss = 7.8574e-03, PNorm = 184.2193, GNorm = 0.0652, lr_0 = 1.7159e-04
Loss = 6.9136e-03, PNorm = 184.2222, GNorm = 0.2532, lr_0 = 1.7147e-04
Loss = 4.6829e-03, PNorm = 184.2278, GNorm = 0.1507, lr_0 = 1.7136e-04
Loss = 2.2729e-03, PNorm = 184.2330, GNorm = 0.1569, lr_0 = 1.7124e-04
Loss = 3.7815e-03, PNorm = 184.2365, GNorm = 0.0834, lr_0 = 1.7112e-04
Loss = 3.3591e-03, PNorm = 184.2395, GNorm = 0.1033, lr_0 = 1.7100e-04
Loss = 5.8943e-03, PNorm = 184.2439, GNorm = 0.1757, lr_0 = 1.7089e-04
Loss = 5.3044e-03, PNorm = 184.2498, GNorm = 0.0793, lr_0 = 1.7077e-04
Loss = 3.5472e-03, PNorm = 184.2532, GNorm = 0.1260, lr_0 = 1.7065e-04
Loss = 3.2464e-03, PNorm = 184.2559, GNorm = 0.0798, lr_0 = 1.7054e-04
Loss = 3.3073e-03, PNorm = 184.2591, GNorm = 0.0715, lr_0 = 1.7042e-04
Loss = 2.7720e-03, PNorm = 184.2627, GNorm = 0.1845, lr_0 = 1.7030e-04
Loss = 4.4876e-03, PNorm = 184.2674, GNorm = 0.1656, lr_0 = 1.7019e-04
Loss = 3.9740e-03, PNorm = 184.2710, GNorm = 0.1538, lr_0 = 1.7007e-04
Loss = 2.6728e-03, PNorm = 184.2745, GNorm = 0.0707, lr_0 = 1.6995e-04
Loss = 2.1506e-03, PNorm = 184.2771, GNorm = 0.1764, lr_0 = 1.6984e-04
Loss = 6.3173e-03, PNorm = 184.2779, GNorm = 0.0903, lr_0 = 1.6972e-04
Loss = 4.9882e-03, PNorm = 184.2809, GNorm = 0.2601, lr_0 = 1.6960e-04
Loss = 4.3803e-03, PNorm = 184.2838, GNorm = 0.1149, lr_0 = 1.6949e-04
Loss = 2.8603e-03, PNorm = 184.2880, GNorm = 0.0924, lr_0 = 1.6937e-04
Loss = 3.5830e-03, PNorm = 184.2929, GNorm = 0.1761, lr_0 = 1.6926e-04
Loss = 1.0917e-02, PNorm = 184.2961, GNorm = 0.3441, lr_0 = 1.6914e-04
Loss = 4.0496e-03, PNorm = 184.3033, GNorm = 0.1700, lr_0 = 1.6902e-04
Loss = 5.1025e-03, PNorm = 184.3080, GNorm = 0.2446, lr_0 = 1.6891e-04
Loss = 2.6259e-03, PNorm = 184.3126, GNorm = 0.0957, lr_0 = 1.6879e-04
Loss = 3.2732e-03, PNorm = 184.3166, GNorm = 0.0878, lr_0 = 1.6868e-04
Loss = 3.1076e-03, PNorm = 184.3208, GNorm = 0.1513, lr_0 = 1.6856e-04
Loss = 4.6241e-03, PNorm = 184.3250, GNorm = 0.1384, lr_0 = 1.6845e-04
Loss = 3.0595e-03, PNorm = 184.3281, GNorm = 0.1459, lr_0 = 1.6833e-04
Loss = 5.0775e-03, PNorm = 184.3305, GNorm = 0.2860, lr_0 = 1.6821e-04
Loss = 3.9645e-03, PNorm = 184.3340, GNorm = 0.0661, lr_0 = 1.6810e-04
Loss = 3.6995e-03, PNorm = 184.3362, GNorm = 0.0644, lr_0 = 1.6798e-04
Loss = 4.4808e-03, PNorm = 184.3379, GNorm = 0.2185, lr_0 = 1.6787e-04
Loss = 4.0359e-03, PNorm = 184.3420, GNorm = 0.1393, lr_0 = 1.6775e-04
Loss = 8.9608e-03, PNorm = 184.3439, GNorm = 0.1024, lr_0 = 1.6764e-04
Loss = 7.5868e-03, PNorm = 184.3440, GNorm = 1.5582, lr_0 = 1.6752e-04
Loss = 7.4351e-03, PNorm = 184.3448, GNorm = 0.3566, lr_0 = 1.6741e-04
Loss = 4.9624e-03, PNorm = 184.3480, GNorm = 0.1024, lr_0 = 1.6729e-04
Loss = 2.9909e-03, PNorm = 184.3519, GNorm = 0.1502, lr_0 = 1.6718e-04
Loss = 2.4083e-03, PNorm = 184.3563, GNorm = 0.0715, lr_0 = 1.6707e-04
Loss = 5.2399e-03, PNorm = 184.3610, GNorm = 1.0972, lr_0 = 1.6695e-04
Loss = 8.0426e-03, PNorm = 184.3646, GNorm = 0.1210, lr_0 = 1.6684e-04
Loss = 4.6124e-03, PNorm = 184.3683, GNorm = 0.1164, lr_0 = 1.6672e-04
Loss = 3.3752e-03, PNorm = 184.3711, GNorm = 0.2842, lr_0 = 1.6661e-04
Loss = 4.5626e-03, PNorm = 184.3761, GNorm = 0.2102, lr_0 = 1.6649e-04
Loss = 2.3660e-03, PNorm = 184.3801, GNorm = 0.0860, lr_0 = 1.6638e-04
Loss = 4.3052e-03, PNorm = 184.3842, GNorm = 0.1096, lr_0 = 1.6627e-04
Loss = 5.9405e-03, PNorm = 184.3879, GNorm = 0.2460, lr_0 = 1.6615e-04
Loss = 4.7021e-03, PNorm = 184.3913, GNorm = 0.2889, lr_0 = 1.6604e-04
Loss = 3.7488e-03, PNorm = 184.3953, GNorm = 0.1777, lr_0 = 1.6592e-04
Loss = 3.8984e-03, PNorm = 184.3980, GNorm = 0.0778, lr_0 = 1.6581e-04
Loss = 2.5915e-03, PNorm = 184.4020, GNorm = 0.1685, lr_0 = 1.6570e-04
Loss = 1.1470e-02, PNorm = 184.4053, GNorm = 0.1702, lr_0 = 1.6558e-04
Loss = 5.9687e-03, PNorm = 184.4075, GNorm = 0.1205, lr_0 = 1.6547e-04
Loss = 4.6514e-03, PNorm = 184.4136, GNorm = 0.2186, lr_0 = 1.6536e-04
Loss = 5.4298e-03, PNorm = 184.4179, GNorm = 0.1424, lr_0 = 1.6524e-04
Loss = 7.2001e-03, PNorm = 184.4240, GNorm = 0.3508, lr_0 = 1.6513e-04
Loss = 2.6725e-03, PNorm = 184.4330, GNorm = 0.1318, lr_0 = 1.6502e-04
Loss = 5.0126e-03, PNorm = 184.4374, GNorm = 0.3238, lr_0 = 1.6490e-04
Loss = 6.2484e-03, PNorm = 184.4399, GNorm = 0.3586, lr_0 = 1.6479e-04
Loss = 4.2473e-03, PNorm = 184.4430, GNorm = 0.1832, lr_0 = 1.6468e-04
Loss = 2.7785e-03, PNorm = 184.4459, GNorm = 0.1015, lr_0 = 1.6457e-04
Loss = 3.2509e-03, PNorm = 184.4500, GNorm = 0.1286, lr_0 = 1.6445e-04
Loss = 2.2971e-03, PNorm = 184.4556, GNorm = 0.1193, lr_0 = 1.6434e-04
Loss = 6.7856e-03, PNorm = 184.4605, GNorm = 0.1371, lr_0 = 1.6423e-04
Loss = 2.8971e-03, PNorm = 184.4649, GNorm = 0.1115, lr_0 = 1.6412e-04
Loss = 2.5313e-03, PNorm = 184.4696, GNorm = 0.0561, lr_0 = 1.6400e-04
Loss = 4.6709e-03, PNorm = 184.4742, GNorm = 0.1218, lr_0 = 1.6389e-04
Loss = 7.5944e-03, PNorm = 184.4774, GNorm = 0.1228, lr_0 = 1.6378e-04
Validation mae = 0.121084
Epoch 24
Loss = 3.4633e-03, PNorm = 184.4785, GNorm = 0.0815, lr_0 = 1.6367e-04
Loss = 3.5318e-03, PNorm = 184.4806, GNorm = 0.2985, lr_0 = 1.6355e-04
Loss = 4.2196e-03, PNorm = 184.4827, GNorm = 0.1771, lr_0 = 1.6344e-04
Loss = 2.9945e-03, PNorm = 184.4838, GNorm = 0.0655, lr_0 = 1.6333e-04
Loss = 5.2453e-03, PNorm = 184.4857, GNorm = 0.2405, lr_0 = 1.6322e-04
Loss = 4.4145e-03, PNorm = 184.4866, GNorm = 0.1961, lr_0 = 1.6311e-04
Loss = 2.6422e-03, PNorm = 184.4887, GNorm = 0.2614, lr_0 = 1.6299e-04
Loss = 4.1039e-03, PNorm = 184.4910, GNorm = 0.0778, lr_0 = 1.6288e-04
Loss = 3.2826e-03, PNorm = 184.4947, GNorm = 0.1333, lr_0 = 1.6277e-04
Loss = 4.3381e-03, PNorm = 184.4973, GNorm = 0.1236, lr_0 = 1.6266e-04
Loss = 2.0717e-03, PNorm = 184.4996, GNorm = 0.0552, lr_0 = 1.6255e-04
Loss = 2.1494e-03, PNorm = 184.5015, GNorm = 0.1147, lr_0 = 1.6244e-04
Loss = 2.4450e-03, PNorm = 184.5037, GNorm = 0.0801, lr_0 = 1.6233e-04
Loss = 2.4465e-03, PNorm = 184.5066, GNorm = 0.0641, lr_0 = 1.6221e-04
Loss = 3.4021e-03, PNorm = 184.5095, GNorm = 0.1899, lr_0 = 1.6210e-04
Loss = 3.5618e-03, PNorm = 184.5111, GNorm = 0.1425, lr_0 = 1.6199e-04
Loss = 5.9124e-03, PNorm = 184.5119, GNorm = 0.0901, lr_0 = 1.6188e-04
Loss = 3.2433e-03, PNorm = 184.5139, GNorm = 0.0741, lr_0 = 1.6177e-04
Loss = 2.3352e-03, PNorm = 184.5165, GNorm = 0.1382, lr_0 = 1.6166e-04
Loss = 2.9670e-03, PNorm = 184.5193, GNorm = 0.1637, lr_0 = 1.6155e-04
Loss = 2.2909e-03, PNorm = 184.5217, GNorm = 0.1156, lr_0 = 1.6144e-04
Loss = 2.1175e-03, PNorm = 184.5253, GNorm = 0.1261, lr_0 = 1.6133e-04
Loss = 9.0679e-03, PNorm = 184.5291, GNorm = 0.2661, lr_0 = 1.6122e-04
Loss = 2.1742e-03, PNorm = 184.5305, GNorm = 0.0842, lr_0 = 1.6111e-04
Loss = 1.8968e-03, PNorm = 184.5322, GNorm = 0.1907, lr_0 = 1.6100e-04
Loss = 4.7488e-03, PNorm = 184.5351, GNorm = 0.1566, lr_0 = 1.6089e-04
Loss = 3.7759e-03, PNorm = 184.5377, GNorm = 0.1495, lr_0 = 1.6078e-04
Loss = 6.7003e-03, PNorm = 184.5395, GNorm = 1.4829, lr_0 = 1.6067e-04
Loss = 2.3908e-03, PNorm = 184.5411, GNorm = 0.1914, lr_0 = 1.6056e-04
Loss = 2.8414e-03, PNorm = 184.5447, GNorm = 0.2380, lr_0 = 1.6045e-04
Loss = 2.1632e-03, PNorm = 184.5472, GNorm = 0.1084, lr_0 = 1.6034e-04
Loss = 2.5285e-03, PNorm = 184.5497, GNorm = 0.0763, lr_0 = 1.6023e-04
Loss = 4.2904e-03, PNorm = 184.5530, GNorm = 0.1070, lr_0 = 1.6012e-04
Loss = 2.3921e-03, PNorm = 184.5555, GNorm = 0.1125, lr_0 = 1.6001e-04
Loss = 2.0455e-03, PNorm = 184.5581, GNorm = 0.1249, lr_0 = 1.5990e-04
Loss = 6.0112e-03, PNorm = 184.5598, GNorm = 0.3654, lr_0 = 1.5979e-04
Loss = 3.9665e-03, PNorm = 184.5624, GNorm = 0.2639, lr_0 = 1.5968e-04
Loss = 2.9065e-03, PNorm = 184.5663, GNorm = 0.1318, lr_0 = 1.5957e-04
Loss = 2.6408e-03, PNorm = 184.5694, GNorm = 0.0644, lr_0 = 1.5946e-04
Loss = 4.2022e-03, PNorm = 184.5729, GNorm = 0.1303, lr_0 = 1.5935e-04
Loss = 2.9586e-03, PNorm = 184.5762, GNorm = 0.1636, lr_0 = 1.5924e-04
Loss = 4.1252e-03, PNorm = 184.5792, GNorm = 0.0685, lr_0 = 1.5913e-04
Loss = 5.3872e-03, PNorm = 184.5829, GNorm = 0.1362, lr_0 = 1.5902e-04
Loss = 4.9647e-03, PNorm = 184.5847, GNorm = 0.1339, lr_0 = 1.5891e-04
Loss = 3.1106e-03, PNorm = 184.5872, GNorm = 0.0902, lr_0 = 1.5880e-04
Loss = 2.9136e-03, PNorm = 184.5881, GNorm = 0.0871, lr_0 = 1.5870e-04
Loss = 4.3770e-03, PNorm = 184.5892, GNorm = 0.0795, lr_0 = 1.5859e-04
Loss = 4.3701e-03, PNorm = 184.5923, GNorm = 0.1445, lr_0 = 1.5848e-04
Loss = 6.2485e-03, PNorm = 184.5946, GNorm = 0.1026, lr_0 = 1.5837e-04
Loss = 3.7759e-03, PNorm = 184.5965, GNorm = 0.2848, lr_0 = 1.5826e-04
Loss = 2.5086e-03, PNorm = 184.5983, GNorm = 0.0675, lr_0 = 1.5815e-04
Loss = 3.5620e-03, PNorm = 184.6000, GNorm = 0.1983, lr_0 = 1.5804e-04
Loss = 3.1014e-03, PNorm = 184.6026, GNorm = 0.3478, lr_0 = 1.5794e-04
Loss = 3.9148e-03, PNorm = 184.6051, GNorm = 0.1443, lr_0 = 1.5783e-04
Loss = 3.2905e-03, PNorm = 184.6076, GNorm = 0.3203, lr_0 = 1.5772e-04
Loss = 3.1913e-03, PNorm = 184.6112, GNorm = 0.0733, lr_0 = 1.5761e-04
Loss = 2.1012e-03, PNorm = 184.6133, GNorm = 0.1125, lr_0 = 1.5750e-04
Loss = 2.5969e-03, PNorm = 184.6134, GNorm = 0.2714, lr_0 = 1.5740e-04
Loss = 2.3164e-03, PNorm = 184.6154, GNorm = 0.1580, lr_0 = 1.5729e-04
Loss = 3.0392e-03, PNorm = 184.6166, GNorm = 0.1258, lr_0 = 1.5718e-04
Loss = 1.9817e-03, PNorm = 184.6192, GNorm = 0.1021, lr_0 = 1.5707e-04
Loss = 2.5361e-03, PNorm = 184.6220, GNorm = 0.1347, lr_0 = 1.5697e-04
Loss = 7.1159e-03, PNorm = 184.6268, GNorm = 0.5913, lr_0 = 1.5686e-04
Loss = 4.1834e-03, PNorm = 184.6298, GNorm = 0.0760, lr_0 = 1.5675e-04
Loss = 5.4405e-03, PNorm = 184.6338, GNorm = 0.1245, lr_0 = 1.5664e-04
Loss = 6.0556e-03, PNorm = 184.6379, GNorm = 0.2301, lr_0 = 1.5654e-04
Loss = 6.0067e-03, PNorm = 184.6392, GNorm = 0.2781, lr_0 = 1.5643e-04
Loss = 4.3867e-03, PNorm = 184.6416, GNorm = 0.1048, lr_0 = 1.5632e-04
Loss = 3.3176e-03, PNorm = 184.6435, GNorm = 0.1022, lr_0 = 1.5621e-04
Loss = 6.6740e-03, PNorm = 184.6449, GNorm = 0.2677, lr_0 = 1.5611e-04
Loss = 5.1586e-03, PNorm = 184.6472, GNorm = 0.1504, lr_0 = 1.5600e-04
Loss = 5.3294e-03, PNorm = 184.6497, GNorm = 0.2923, lr_0 = 1.5589e-04
Loss = 4.4252e-03, PNorm = 184.6524, GNorm = 0.1495, lr_0 = 1.5579e-04
Loss = 3.8238e-03, PNorm = 184.6559, GNorm = 0.0725, lr_0 = 1.5568e-04
Loss = 4.9424e-03, PNorm = 184.6587, GNorm = 0.0898, lr_0 = 1.5557e-04
Loss = 2.7532e-03, PNorm = 184.6635, GNorm = 0.2066, lr_0 = 1.5547e-04
Loss = 5.2640e-03, PNorm = 184.6650, GNorm = 0.1067, lr_0 = 1.5536e-04
Loss = 3.5005e-03, PNorm = 184.6668, GNorm = 0.1737, lr_0 = 1.5525e-04
Loss = 3.2384e-03, PNorm = 184.6698, GNorm = 0.0688, lr_0 = 1.5515e-04
Loss = 6.9454e-03, PNorm = 184.6722, GNorm = 0.2445, lr_0 = 1.5504e-04
Loss = 2.8685e-03, PNorm = 184.6745, GNorm = 0.2568, lr_0 = 1.5493e-04
Loss = 3.8517e-03, PNorm = 184.6767, GNorm = 0.1508, lr_0 = 1.5483e-04
Loss = 5.5417e-03, PNorm = 184.6778, GNorm = 0.1769, lr_0 = 1.5472e-04
Loss = 4.6444e-03, PNorm = 184.6805, GNorm = 0.0831, lr_0 = 1.5462e-04
Loss = 3.1561e-03, PNorm = 184.6825, GNorm = 0.2045, lr_0 = 1.5451e-04
Loss = 3.0874e-03, PNorm = 184.6855, GNorm = 0.1236, lr_0 = 1.5440e-04
Loss = 8.9322e-03, PNorm = 184.6879, GNorm = 0.2776, lr_0 = 1.5430e-04
Loss = 4.9124e-03, PNorm = 184.6900, GNorm = 0.0703, lr_0 = 1.5419e-04
Loss = 5.3606e-03, PNorm = 184.6929, GNorm = 0.1425, lr_0 = 1.5409e-04
Loss = 6.4165e-03, PNorm = 184.6936, GNorm = 0.1884, lr_0 = 1.5398e-04
Loss = 8.1407e-03, PNorm = 184.6994, GNorm = 0.6076, lr_0 = 1.5388e-04
Loss = 8.6943e-03, PNorm = 184.7022, GNorm = 0.1600, lr_0 = 1.5377e-04
Loss = 3.5294e-03, PNorm = 184.7053, GNorm = 0.1594, lr_0 = 1.5367e-04
Loss = 2.8075e-03, PNorm = 184.7088, GNorm = 0.2031, lr_0 = 1.5356e-04
Loss = 1.0016e-02, PNorm = 184.7103, GNorm = 0.1106, lr_0 = 1.5346e-04
Loss = 2.1827e-03, PNorm = 184.7140, GNorm = 0.0815, lr_0 = 1.5335e-04
Loss = 2.4347e-03, PNorm = 184.7162, GNorm = 0.1613, lr_0 = 1.5325e-04
Loss = 3.6275e-03, PNorm = 184.7199, GNorm = 0.0670, lr_0 = 1.5314e-04
Loss = 2.2168e-03, PNorm = 184.7234, GNorm = 0.0828, lr_0 = 1.5304e-04
Loss = 4.9263e-03, PNorm = 184.7258, GNorm = 0.1417, lr_0 = 1.5293e-04
Loss = 2.7771e-03, PNorm = 184.7277, GNorm = 0.0954, lr_0 = 1.5283e-04
Loss = 3.5007e-03, PNorm = 184.7310, GNorm = 0.1932, lr_0 = 1.5272e-04
Loss = 3.6145e-03, PNorm = 184.7352, GNorm = 0.2567, lr_0 = 1.5262e-04
Loss = 3.2227e-03, PNorm = 184.7383, GNorm = 0.1165, lr_0 = 1.5251e-04
Loss = 4.6665e-03, PNorm = 184.7405, GNorm = 0.0899, lr_0 = 1.5241e-04
Loss = 2.3409e-03, PNorm = 184.7441, GNorm = 0.0847, lr_0 = 1.5230e-04
Loss = 4.2170e-03, PNorm = 184.7459, GNorm = 0.0997, lr_0 = 1.5220e-04
Loss = 8.3512e-03, PNorm = 184.7491, GNorm = 0.1475, lr_0 = 1.5209e-04
Loss = 6.0994e-03, PNorm = 184.7507, GNorm = 0.1145, lr_0 = 1.5199e-04
Loss = 5.9063e-03, PNorm = 184.7535, GNorm = 0.2010, lr_0 = 1.5189e-04
Loss = 5.6036e-03, PNorm = 184.7565, GNorm = 0.2937, lr_0 = 1.5178e-04
Loss = 4.7885e-03, PNorm = 184.7600, GNorm = 0.1585, lr_0 = 1.5168e-04
Loss = 2.7102e-03, PNorm = 184.7623, GNorm = 0.0941, lr_0 = 1.5157e-04
Loss = 2.7420e-03, PNorm = 184.7653, GNorm = 0.1938, lr_0 = 1.5147e-04
Loss = 6.9359e-03, PNorm = 184.7651, GNorm = 0.5715, lr_0 = 1.5137e-04
Loss = 6.6622e-03, PNorm = 184.7682, GNorm = 0.1632, lr_0 = 1.5126e-04
Loss = 2.1785e-02, PNorm = 184.7731, GNorm = 0.5533, lr_0 = 1.5116e-04
Loss = 7.7970e-03, PNorm = 184.7756, GNorm = 0.0875, lr_0 = 1.5106e-04
Loss = 3.0129e-03, PNorm = 184.7789, GNorm = 0.1261, lr_0 = 1.5095e-04
Loss = 3.3997e-03, PNorm = 184.7822, GNorm = 0.1102, lr_0 = 1.5085e-04
Validation mae = 0.121011
Epoch 25
Loss = 1.0542e-02, PNorm = 184.7825, GNorm = 0.1673, lr_0 = 1.5075e-04
Loss = 4.4390e-03, PNorm = 184.7835, GNorm = 0.4946, lr_0 = 1.5064e-04
Loss = 2.3878e-03, PNorm = 184.7879, GNorm = 0.2093, lr_0 = 1.5054e-04
Loss = 2.1195e-03, PNorm = 184.7908, GNorm = 0.2111, lr_0 = 1.5044e-04
Loss = 2.3135e-03, PNorm = 184.7929, GNorm = 0.1017, lr_0 = 1.5033e-04
Loss = 7.8251e-03, PNorm = 184.7961, GNorm = 0.1470, lr_0 = 1.5023e-04
Loss = 1.7963e-03, PNorm = 184.7988, GNorm = 0.1130, lr_0 = 1.5013e-04
Loss = 2.3693e-03, PNorm = 184.8032, GNorm = 0.1825, lr_0 = 1.5002e-04
Loss = 2.7960e-03, PNorm = 184.8062, GNorm = 0.1728, lr_0 = 1.4992e-04
Loss = 9.8839e-03, PNorm = 184.8090, GNorm = 0.0908, lr_0 = 1.4982e-04
Loss = 2.7488e-03, PNorm = 184.8124, GNorm = 0.1826, lr_0 = 1.4972e-04
Loss = 5.5957e-03, PNorm = 184.8151, GNorm = 0.1088, lr_0 = 1.4961e-04
Loss = 3.8822e-03, PNorm = 184.8182, GNorm = 0.1296, lr_0 = 1.4951e-04
Loss = 5.4855e-03, PNorm = 184.8195, GNorm = 0.2241, lr_0 = 1.4941e-04
Loss = 5.5901e-03, PNorm = 184.8206, GNorm = 0.1337, lr_0 = 1.4931e-04
Loss = 2.4154e-03, PNorm = 184.8222, GNorm = 0.0970, lr_0 = 1.4920e-04
Loss = 2.3426e-03, PNorm = 184.8239, GNorm = 0.1107, lr_0 = 1.4910e-04
Loss = 5.3229e-03, PNorm = 184.8261, GNorm = 0.0586, lr_0 = 1.4900e-04
Loss = 4.3936e-03, PNorm = 184.8290, GNorm = 0.0978, lr_0 = 1.4890e-04
Loss = 1.6915e-03, PNorm = 184.8316, GNorm = 0.1194, lr_0 = 1.4880e-04
Loss = 4.3414e-03, PNorm = 184.8321, GNorm = 0.1060, lr_0 = 1.4869e-04
Loss = 3.6899e-03, PNorm = 184.8360, GNorm = 0.1180, lr_0 = 1.4859e-04
Loss = 5.2658e-03, PNorm = 184.8371, GNorm = 0.2183, lr_0 = 1.4849e-04
Loss = 3.8212e-03, PNorm = 184.8386, GNorm = 0.0697, lr_0 = 1.4839e-04
Loss = 5.0551e-03, PNorm = 184.8403, GNorm = 0.0937, lr_0 = 1.4829e-04
Loss = 1.9007e-03, PNorm = 184.8415, GNorm = 0.0567, lr_0 = 1.4818e-04
Loss = 2.1593e-03, PNorm = 184.8431, GNorm = 0.0894, lr_0 = 1.4808e-04
Loss = 6.5128e-03, PNorm = 184.8452, GNorm = 0.1220, lr_0 = 1.4798e-04
Loss = 8.4179e-03, PNorm = 184.8453, GNorm = 0.1867, lr_0 = 1.4788e-04
Loss = 3.1094e-03, PNorm = 184.8480, GNorm = 0.1356, lr_0 = 1.4778e-04
Loss = 2.3612e-03, PNorm = 184.8521, GNorm = 0.1113, lr_0 = 1.4768e-04
Loss = 1.9768e-03, PNorm = 184.8552, GNorm = 0.1568, lr_0 = 1.4758e-04
Loss = 6.3849e-03, PNorm = 184.8589, GNorm = 0.5047, lr_0 = 1.4748e-04
Loss = 2.8791e-03, PNorm = 184.8625, GNorm = 0.1150, lr_0 = 1.4737e-04
Loss = 2.0926e-03, PNorm = 184.8663, GNorm = 0.0929, lr_0 = 1.4727e-04
Loss = 2.3574e-03, PNorm = 184.8710, GNorm = 0.1055, lr_0 = 1.4717e-04
Loss = 2.8260e-03, PNorm = 184.8742, GNorm = 0.1159, lr_0 = 1.4707e-04
Loss = 2.2065e-03, PNorm = 184.8768, GNorm = 0.1026, lr_0 = 1.4697e-04
Loss = 1.9106e-03, PNorm = 184.8804, GNorm = 0.0986, lr_0 = 1.4687e-04
Loss = 3.3227e-03, PNorm = 184.8830, GNorm = 0.0576, lr_0 = 1.4677e-04
Loss = 2.6945e-03, PNorm = 184.8855, GNorm = 0.1541, lr_0 = 1.4667e-04
Loss = 2.1492e-03, PNorm = 184.8874, GNorm = 0.0988, lr_0 = 1.4657e-04
Loss = 1.7569e-03, PNorm = 184.8896, GNorm = 0.1167, lr_0 = 1.4647e-04
Loss = 3.1906e-03, PNorm = 184.8919, GNorm = 0.2441, lr_0 = 1.4637e-04
Loss = 2.7341e-03, PNorm = 184.8930, GNorm = 0.0865, lr_0 = 1.4627e-04
Loss = 3.1388e-03, PNorm = 184.8946, GNorm = 0.1040, lr_0 = 1.4617e-04
Loss = 3.2283e-03, PNorm = 184.8958, GNorm = 0.1249, lr_0 = 1.4607e-04
Loss = 2.0170e-03, PNorm = 184.8983, GNorm = 0.1682, lr_0 = 1.4597e-04
Loss = 8.3924e-03, PNorm = 184.9010, GNorm = 0.1999, lr_0 = 1.4587e-04
Loss = 9.6435e-03, PNorm = 184.9030, GNorm = 0.2681, lr_0 = 1.4577e-04
Loss = 1.9272e-03, PNorm = 184.9063, GNorm = 0.2917, lr_0 = 1.4567e-04
Loss = 2.0841e-03, PNorm = 184.9091, GNorm = 0.1228, lr_0 = 1.4557e-04
Loss = 3.9107e-03, PNorm = 184.9131, GNorm = 0.2003, lr_0 = 1.4547e-04
Loss = 3.0021e-03, PNorm = 184.9159, GNorm = 0.1369, lr_0 = 1.4537e-04
Loss = 4.6285e-03, PNorm = 184.9172, GNorm = 0.0862, lr_0 = 1.4527e-04
Loss = 2.3388e-03, PNorm = 184.9193, GNorm = 0.0595, lr_0 = 1.4517e-04
Loss = 3.6077e-03, PNorm = 184.9219, GNorm = 0.1147, lr_0 = 1.4507e-04
Loss = 5.4289e-03, PNorm = 184.9248, GNorm = 0.1943, lr_0 = 1.4497e-04
Loss = 2.4498e-03, PNorm = 184.9269, GNorm = 0.0786, lr_0 = 1.4487e-04
Loss = 6.3441e-03, PNorm = 184.9284, GNorm = 0.0843, lr_0 = 1.4477e-04
Loss = 3.2355e-03, PNorm = 184.9308, GNorm = 0.2167, lr_0 = 1.4467e-04
Loss = 5.0781e-03, PNorm = 184.9334, GNorm = 0.0872, lr_0 = 1.4457e-04
Loss = 3.5653e-03, PNorm = 184.9359, GNorm = 0.0907, lr_0 = 1.4447e-04
Loss = 2.2071e-03, PNorm = 184.9388, GNorm = 0.1040, lr_0 = 1.4438e-04
Loss = 5.8255e-03, PNorm = 184.9421, GNorm = 0.4990, lr_0 = 1.4428e-04
Loss = 1.9754e-03, PNorm = 184.9440, GNorm = 0.1741, lr_0 = 1.4418e-04
Loss = 4.5461e-03, PNorm = 184.9453, GNorm = 0.1338, lr_0 = 1.4408e-04
Loss = 2.9584e-03, PNorm = 184.9484, GNorm = 0.0585, lr_0 = 1.4398e-04
Loss = 1.8981e-03, PNorm = 184.9519, GNorm = 0.0665, lr_0 = 1.4388e-04
Loss = 6.7842e-03, PNorm = 184.9554, GNorm = 0.1115, lr_0 = 1.4378e-04
Loss = 3.6661e-03, PNorm = 184.9580, GNorm = 0.3406, lr_0 = 1.4368e-04
Loss = 4.3241e-03, PNorm = 184.9607, GNorm = 0.0520, lr_0 = 1.4359e-04
Loss = 5.0838e-03, PNorm = 184.9631, GNorm = 0.1281, lr_0 = 1.4349e-04
Loss = 3.4507e-03, PNorm = 184.9660, GNorm = 0.1190, lr_0 = 1.4339e-04
Loss = 3.5900e-03, PNorm = 184.9699, GNorm = 0.1468, lr_0 = 1.4329e-04
Loss = 3.7638e-03, PNorm = 184.9723, GNorm = 0.0790, lr_0 = 1.4319e-04
Loss = 6.2815e-03, PNorm = 184.9757, GNorm = 0.0590, lr_0 = 1.4310e-04
Loss = 7.3401e-03, PNorm = 184.9793, GNorm = 1.1336, lr_0 = 1.4300e-04
Loss = 4.2394e-03, PNorm = 184.9828, GNorm = 0.0709, lr_0 = 1.4290e-04
Loss = 2.1249e-03, PNorm = 184.9841, GNorm = 0.1298, lr_0 = 1.4280e-04
Loss = 2.6535e-03, PNorm = 184.9862, GNorm = 0.2126, lr_0 = 1.4270e-04
Loss = 2.4300e-03, PNorm = 184.9892, GNorm = 0.0638, lr_0 = 1.4261e-04
Loss = 6.4723e-03, PNorm = 184.9908, GNorm = 0.2112, lr_0 = 1.4251e-04
Loss = 1.7023e-03, PNorm = 184.9928, GNorm = 0.1061, lr_0 = 1.4241e-04
Loss = 3.3606e-03, PNorm = 184.9941, GNorm = 0.1001, lr_0 = 1.4231e-04
Loss = 4.7610e-03, PNorm = 184.9949, GNorm = 0.0579, lr_0 = 1.4222e-04
Loss = 3.7972e-03, PNorm = 184.9976, GNorm = 0.1074, lr_0 = 1.4212e-04
Loss = 3.3976e-03, PNorm = 184.9987, GNorm = 0.2558, lr_0 = 1.4202e-04
Loss = 3.5250e-03, PNorm = 184.9995, GNorm = 0.2286, lr_0 = 1.4192e-04
Loss = 6.0404e-03, PNorm = 185.0019, GNorm = 0.1452, lr_0 = 1.4183e-04
Loss = 5.5576e-03, PNorm = 185.0043, GNorm = 0.0769, lr_0 = 1.4173e-04
Loss = 1.6653e-03, PNorm = 185.0066, GNorm = 0.1158, lr_0 = 1.4163e-04
Loss = 6.0622e-03, PNorm = 185.0101, GNorm = 0.1154, lr_0 = 1.4153e-04
Loss = 2.2335e-03, PNorm = 185.0110, GNorm = 0.0684, lr_0 = 1.4144e-04
Loss = 4.8421e-03, PNorm = 185.0124, GNorm = 0.1035, lr_0 = 1.4134e-04
Loss = 1.6426e-03, PNorm = 185.0142, GNorm = 0.1977, lr_0 = 1.4124e-04
Loss = 3.3528e-03, PNorm = 185.0175, GNorm = 0.1853, lr_0 = 1.4115e-04
Loss = 4.6963e-03, PNorm = 185.0203, GNorm = 0.0907, lr_0 = 1.4105e-04
Loss = 6.9869e-03, PNorm = 185.0223, GNorm = 1.2471, lr_0 = 1.4095e-04
Loss = 1.9231e-03, PNorm = 185.0241, GNorm = 0.0442, lr_0 = 1.4086e-04
Loss = 4.4613e-03, PNorm = 185.0274, GNorm = 0.2263, lr_0 = 1.4076e-04
Loss = 4.8033e-03, PNorm = 185.0297, GNorm = 0.0686, lr_0 = 1.4066e-04
Loss = 2.1953e-03, PNorm = 185.0322, GNorm = 0.0833, lr_0 = 1.4057e-04
Loss = 1.7031e-03, PNorm = 185.0340, GNorm = 0.2264, lr_0 = 1.4047e-04
Loss = 2.8729e-03, PNorm = 185.0357, GNorm = 0.1272, lr_0 = 1.4038e-04
Loss = 3.2506e-03, PNorm = 185.0378, GNorm = 0.3198, lr_0 = 1.4028e-04
Loss = 2.2102e-03, PNorm = 185.0392, GNorm = 0.1217, lr_0 = 1.4018e-04
Loss = 6.0472e-03, PNorm = 185.0398, GNorm = 0.2156, lr_0 = 1.4009e-04
Loss = 4.1921e-03, PNorm = 185.0409, GNorm = 0.5579, lr_0 = 1.3999e-04
Loss = 2.0232e-03, PNorm = 185.0439, GNorm = 0.1224, lr_0 = 1.3990e-04
Loss = 1.7691e-03, PNorm = 185.0473, GNorm = 0.1311, lr_0 = 1.3980e-04
Loss = 2.5497e-03, PNorm = 185.0498, GNorm = 0.1760, lr_0 = 1.3970e-04
Loss = 2.4387e-03, PNorm = 185.0516, GNorm = 0.1146, lr_0 = 1.3961e-04
Loss = 3.3853e-03, PNorm = 185.0536, GNorm = 0.0649, lr_0 = 1.3951e-04
Loss = 8.6616e-03, PNorm = 185.0543, GNorm = 0.1907, lr_0 = 1.3942e-04
Loss = 5.2210e-03, PNorm = 185.0552, GNorm = 0.1160, lr_0 = 1.3932e-04
Loss = 3.2426e-03, PNorm = 185.0588, GNorm = 0.3536, lr_0 = 1.3923e-04
Loss = 3.5606e-03, PNorm = 185.0612, GNorm = 0.0974, lr_0 = 1.3913e-04
Loss = 6.0371e-03, PNorm = 185.0639, GNorm = 0.0670, lr_0 = 1.3904e-04
Loss = 2.3955e-03, PNorm = 185.0669, GNorm = 0.0757, lr_0 = 1.3894e-04
Validation mae = 0.121007
Epoch 26
Loss = 2.1426e-03, PNorm = 185.0676, GNorm = 0.0764, lr_0 = 1.3884e-04
Loss = 4.6660e-03, PNorm = 185.0678, GNorm = 0.1633, lr_0 = 1.3875e-04
Loss = 3.4728e-03, PNorm = 185.0685, GNorm = 0.1606, lr_0 = 1.3865e-04
Loss = 1.7557e-03, PNorm = 185.0691, GNorm = 0.1271, lr_0 = 1.3856e-04
Loss = 3.4219e-03, PNorm = 185.0699, GNorm = 0.1323, lr_0 = 1.3846e-04
Loss = 1.9898e-03, PNorm = 185.0715, GNorm = 0.1506, lr_0 = 1.3837e-04
Loss = 3.8943e-03, PNorm = 185.0748, GNorm = 0.0758, lr_0 = 1.3828e-04
Loss = 6.4946e-03, PNorm = 185.0768, GNorm = 0.0873, lr_0 = 1.3818e-04
Loss = 3.1482e-03, PNorm = 185.0797, GNorm = 0.1089, lr_0 = 1.3809e-04
Loss = 1.9899e-03, PNorm = 185.0816, GNorm = 0.1389, lr_0 = 1.3799e-04
Loss = 4.0309e-03, PNorm = 185.0824, GNorm = 0.1519, lr_0 = 1.3790e-04
Loss = 1.6724e-03, PNorm = 185.0841, GNorm = 0.0503, lr_0 = 1.3780e-04
Loss = 1.7107e-03, PNorm = 185.0861, GNorm = 0.1100, lr_0 = 1.3771e-04
Loss = 5.4549e-03, PNorm = 185.0866, GNorm = 0.0538, lr_0 = 1.3761e-04
Loss = 2.7657e-03, PNorm = 185.0887, GNorm = 0.1947, lr_0 = 1.3752e-04
Loss = 1.6411e-03, PNorm = 185.0904, GNorm = 0.0939, lr_0 = 1.3742e-04
Loss = 1.8333e-03, PNorm = 185.0918, GNorm = 0.1806, lr_0 = 1.3733e-04
Loss = 2.2971e-03, PNorm = 185.0928, GNorm = 0.1450, lr_0 = 1.3724e-04
Loss = 2.9861e-03, PNorm = 185.0943, GNorm = 0.0951, lr_0 = 1.3714e-04
Loss = 6.6122e-03, PNorm = 185.0971, GNorm = 0.0800, lr_0 = 1.3705e-04
Loss = 2.2195e-03, PNorm = 185.0970, GNorm = 0.0538, lr_0 = 1.3695e-04
Loss = 3.4333e-03, PNorm = 185.0987, GNorm = 0.1009, lr_0 = 1.3686e-04
Loss = 2.1910e-03, PNorm = 185.1005, GNorm = 0.0614, lr_0 = 1.3677e-04
Loss = 1.7864e-03, PNorm = 185.1015, GNorm = 0.2032, lr_0 = 1.3667e-04
Loss = 4.8890e-03, PNorm = 185.1047, GNorm = 0.7177, lr_0 = 1.3658e-04
Loss = 4.2195e-03, PNorm = 185.1064, GNorm = 0.1272, lr_0 = 1.3649e-04
Loss = 4.7416e-03, PNorm = 185.1059, GNorm = 1.2591, lr_0 = 1.3639e-04
Loss = 4.1718e-03, PNorm = 185.1067, GNorm = 0.0963, lr_0 = 1.3630e-04
Loss = 2.6420e-03, PNorm = 185.1111, GNorm = 0.1109, lr_0 = 1.3621e-04
Loss = 1.7589e-03, PNorm = 185.1147, GNorm = 0.2008, lr_0 = 1.3611e-04
Loss = 3.0802e-03, PNorm = 185.1172, GNorm = 0.1099, lr_0 = 1.3602e-04
Loss = 2.4212e-03, PNorm = 185.1199, GNorm = 0.2529, lr_0 = 1.3593e-04
Loss = 8.3864e-03, PNorm = 185.1226, GNorm = 0.1080, lr_0 = 1.3583e-04
Loss = 1.4149e-02, PNorm = 185.1259, GNorm = 0.1621, lr_0 = 1.3574e-04
Loss = 2.5192e-03, PNorm = 185.1288, GNorm = 0.1571, lr_0 = 1.3565e-04
Loss = 2.5025e-03, PNorm = 185.1306, GNorm = 0.1154, lr_0 = 1.3555e-04
Loss = 3.1445e-03, PNorm = 185.1325, GNorm = 0.0537, lr_0 = 1.3546e-04
Loss = 3.3754e-03, PNorm = 185.1330, GNorm = 0.0896, lr_0 = 1.3537e-04
Loss = 3.8781e-03, PNorm = 185.1337, GNorm = 0.0746, lr_0 = 1.3528e-04
Loss = 5.1611e-03, PNorm = 185.1348, GNorm = 0.0586, lr_0 = 1.3518e-04
Loss = 5.8439e-03, PNorm = 185.1365, GNorm = 0.1083, lr_0 = 1.3509e-04
Loss = 1.7003e-03, PNorm = 185.1381, GNorm = 0.0985, lr_0 = 1.3500e-04
Loss = 2.8812e-03, PNorm = 185.1406, GNorm = 0.1082, lr_0 = 1.3491e-04
Loss = 3.4868e-03, PNorm = 185.1421, GNorm = 0.1283, lr_0 = 1.3481e-04
Loss = 1.9758e-03, PNorm = 185.1440, GNorm = 0.1385, lr_0 = 1.3472e-04
Loss = 3.2633e-03, PNorm = 185.1452, GNorm = 0.5957, lr_0 = 1.3463e-04
Loss = 2.9686e-03, PNorm = 185.1452, GNorm = 0.1167, lr_0 = 1.3454e-04
Loss = 9.9341e-03, PNorm = 185.1438, GNorm = 0.1478, lr_0 = 1.3444e-04
Loss = 3.6333e-03, PNorm = 185.1453, GNorm = 0.0618, lr_0 = 1.3435e-04
Loss = 1.8773e-03, PNorm = 185.1472, GNorm = 0.1596, lr_0 = 1.3426e-04
Loss = 5.7202e-03, PNorm = 185.1484, GNorm = 0.0959, lr_0 = 1.3417e-04
Loss = 1.3963e-03, PNorm = 185.1500, GNorm = 0.0537, lr_0 = 1.3408e-04
Loss = 2.4622e-03, PNorm = 185.1527, GNorm = 0.0686, lr_0 = 1.3398e-04
Loss = 2.5984e-03, PNorm = 185.1555, GNorm = 0.1121, lr_0 = 1.3389e-04
Loss = 1.8337e-03, PNorm = 185.1570, GNorm = 0.0743, lr_0 = 1.3380e-04
Loss = 2.3784e-03, PNorm = 185.1584, GNorm = 0.0730, lr_0 = 1.3371e-04
Loss = 3.9967e-03, PNorm = 185.1605, GNorm = 0.1017, lr_0 = 1.3362e-04
Loss = 2.4555e-03, PNorm = 185.1629, GNorm = 0.0876, lr_0 = 1.3353e-04
Loss = 3.3334e-03, PNorm = 185.1646, GNorm = 0.1542, lr_0 = 1.3343e-04
Loss = 3.1403e-03, PNorm = 185.1661, GNorm = 0.1478, lr_0 = 1.3334e-04
Loss = 4.8453e-03, PNorm = 185.1676, GNorm = 0.1052, lr_0 = 1.3325e-04
Loss = 2.3671e-03, PNorm = 185.1704, GNorm = 0.0539, lr_0 = 1.3316e-04
Loss = 3.0174e-03, PNorm = 185.1733, GNorm = 0.0830, lr_0 = 1.3307e-04
Loss = 3.8935e-03, PNorm = 185.1760, GNorm = 0.1413, lr_0 = 1.3298e-04
Loss = 8.5813e-03, PNorm = 185.1787, GNorm = 0.1969, lr_0 = 1.3289e-04
Loss = 1.7801e-03, PNorm = 185.1808, GNorm = 0.0819, lr_0 = 1.3280e-04
Loss = 1.9487e-03, PNorm = 185.1830, GNorm = 0.0844, lr_0 = 1.3270e-04
Loss = 3.4453e-03, PNorm = 185.1848, GNorm = 0.1035, lr_0 = 1.3261e-04
Loss = 3.3275e-03, PNorm = 185.1870, GNorm = 0.2149, lr_0 = 1.3252e-04
Loss = 3.0398e-03, PNorm = 185.1888, GNorm = 0.1632, lr_0 = 1.3243e-04
Loss = 3.3183e-03, PNorm = 185.1911, GNorm = 0.2257, lr_0 = 1.3234e-04
Loss = 4.7212e-03, PNorm = 185.1935, GNorm = 0.1826, lr_0 = 1.3225e-04
Loss = 1.9106e-03, PNorm = 185.1955, GNorm = 0.0655, lr_0 = 1.3216e-04
Loss = 3.0053e-03, PNorm = 185.1980, GNorm = 0.0925, lr_0 = 1.3207e-04
Loss = 2.1189e-03, PNorm = 185.2009, GNorm = 0.0807, lr_0 = 1.3198e-04
Loss = 2.8679e-03, PNorm = 185.2027, GNorm = 0.0725, lr_0 = 1.3189e-04
Loss = 2.6738e-03, PNorm = 185.2042, GNorm = 0.2876, lr_0 = 1.3180e-04
Loss = 3.9615e-03, PNorm = 185.2059, GNorm = 0.0762, lr_0 = 1.3171e-04
Loss = 7.0257e-03, PNorm = 185.2089, GNorm = 0.1788, lr_0 = 1.3162e-04
Loss = 3.1072e-03, PNorm = 185.2135, GNorm = 0.1193, lr_0 = 1.3153e-04
Loss = 1.3660e-03, PNorm = 185.2150, GNorm = 0.1118, lr_0 = 1.3144e-04
Loss = 2.4337e-03, PNorm = 185.2160, GNorm = 0.0380, lr_0 = 1.3135e-04
Loss = 6.3207e-03, PNorm = 185.2156, GNorm = 0.1365, lr_0 = 1.3126e-04
Loss = 1.9062e-03, PNorm = 185.2172, GNorm = 0.1860, lr_0 = 1.3117e-04
Loss = 2.8304e-03, PNorm = 185.2197, GNorm = 0.1162, lr_0 = 1.3108e-04
Loss = 3.1247e-03, PNorm = 185.2212, GNorm = 0.0572, lr_0 = 1.3099e-04
Loss = 4.5560e-03, PNorm = 185.2219, GNorm = 0.2925, lr_0 = 1.3090e-04
Loss = 6.5536e-03, PNorm = 185.2244, GNorm = 0.0833, lr_0 = 1.3081e-04
Loss = 2.3085e-03, PNorm = 185.2261, GNorm = 0.1487, lr_0 = 1.3072e-04
Loss = 5.9233e-03, PNorm = 185.2269, GNorm = 0.0991, lr_0 = 1.3063e-04
Loss = 2.3687e-03, PNorm = 185.2278, GNorm = 0.1295, lr_0 = 1.3054e-04
Loss = 7.8549e-03, PNorm = 185.2292, GNorm = 0.1182, lr_0 = 1.3045e-04
Loss = 6.2040e-03, PNorm = 185.2300, GNorm = 0.1176, lr_0 = 1.3036e-04
Loss = 5.9989e-03, PNorm = 185.2321, GNorm = 1.1593, lr_0 = 1.3027e-04
Loss = 3.6152e-03, PNorm = 185.2335, GNorm = 0.3022, lr_0 = 1.3018e-04
Loss = 4.7822e-03, PNorm = 185.2334, GNorm = 0.1313, lr_0 = 1.3009e-04
Loss = 3.0643e-03, PNorm = 185.2354, GNorm = 0.1167, lr_0 = 1.3000e-04
Loss = 6.4452e-03, PNorm = 185.2378, GNorm = 0.4522, lr_0 = 1.2992e-04
Loss = 1.5658e-03, PNorm = 185.2405, GNorm = 0.0448, lr_0 = 1.2983e-04
Loss = 2.8129e-03, PNorm = 185.2425, GNorm = 0.0832, lr_0 = 1.2974e-04
Loss = 4.4422e-03, PNorm = 185.2452, GNorm = 0.0893, lr_0 = 1.2965e-04
Loss = 4.0131e-03, PNorm = 185.2467, GNorm = 0.1093, lr_0 = 1.2956e-04
Loss = 1.9889e-03, PNorm = 185.2473, GNorm = 0.1891, lr_0 = 1.2947e-04
Loss = 2.9190e-03, PNorm = 185.2492, GNorm = 0.0894, lr_0 = 1.2938e-04
Loss = 1.5743e-03, PNorm = 185.2511, GNorm = 0.1167, lr_0 = 1.2929e-04
Loss = 1.6208e-03, PNorm = 185.2526, GNorm = 0.0861, lr_0 = 1.2921e-04
Loss = 1.5809e-03, PNorm = 185.2546, GNorm = 0.0643, lr_0 = 1.2912e-04
Loss = 1.2383e-03, PNorm = 185.2565, GNorm = 0.0601, lr_0 = 1.2903e-04
Loss = 2.7126e-03, PNorm = 185.2582, GNorm = 0.3925, lr_0 = 1.2894e-04
Loss = 5.8036e-03, PNorm = 185.2601, GNorm = 0.1249, lr_0 = 1.2885e-04
Loss = 3.0412e-03, PNorm = 185.2616, GNorm = 0.0578, lr_0 = 1.2876e-04
Loss = 1.6221e-03, PNorm = 185.2637, GNorm = 0.1388, lr_0 = 1.2867e-04
Loss = 2.3109e-03, PNorm = 185.2656, GNorm = 0.2598, lr_0 = 1.2859e-04
Loss = 3.4711e-03, PNorm = 185.2668, GNorm = 0.1435, lr_0 = 1.2850e-04
Loss = 3.0381e-03, PNorm = 185.2688, GNorm = 0.1480, lr_0 = 1.2841e-04
Loss = 1.5985e-03, PNorm = 185.2707, GNorm = 0.1136, lr_0 = 1.2832e-04
Loss = 6.8226e-03, PNorm = 185.2732, GNorm = 0.1159, lr_0 = 1.2823e-04
Loss = 2.9706e-03, PNorm = 185.2745, GNorm = 0.0686, lr_0 = 1.2815e-04
Loss = 3.4615e-03, PNorm = 185.2759, GNorm = 0.2016, lr_0 = 1.2806e-04
Loss = 2.4011e-03, PNorm = 185.2792, GNorm = 0.1516, lr_0 = 1.2797e-04
Validation mae = 0.120762
Epoch 27
Loss = 2.2714e-03, PNorm = 185.2813, GNorm = 0.1480, lr_0 = 1.2788e-04
Loss = 1.4812e-03, PNorm = 185.2832, GNorm = 0.1521, lr_0 = 1.2780e-04
Loss = 2.2863e-03, PNorm = 185.2852, GNorm = 0.0606, lr_0 = 1.2771e-04
Loss = 1.1636e-03, PNorm = 185.2877, GNorm = 0.2107, lr_0 = 1.2762e-04
Loss = 1.5217e-02, PNorm = 185.2902, GNorm = 0.2463, lr_0 = 1.2753e-04
Loss = 3.2052e-03, PNorm = 185.2915, GNorm = 0.1523, lr_0 = 1.2745e-04
Loss = 1.3200e-03, PNorm = 185.2934, GNorm = 0.2172, lr_0 = 1.2736e-04
Loss = 2.8290e-03, PNorm = 185.2950, GNorm = 0.1577, lr_0 = 1.2727e-04
Loss = 1.5097e-03, PNorm = 185.2963, GNorm = 0.0621, lr_0 = 1.2718e-04
Loss = 4.5406e-03, PNorm = 185.2977, GNorm = 0.0982, lr_0 = 1.2710e-04
Loss = 3.4196e-03, PNorm = 185.2984, GNorm = 0.5723, lr_0 = 1.2701e-04
Loss = 3.1897e-03, PNorm = 185.2997, GNorm = 0.0845, lr_0 = 1.2692e-04
Loss = 1.3393e-03, PNorm = 185.3013, GNorm = 0.0449, lr_0 = 1.2684e-04
Loss = 3.3850e-03, PNorm = 185.3019, GNorm = 0.0513, lr_0 = 1.2675e-04
Loss = 2.9024e-03, PNorm = 185.3038, GNorm = 0.1471, lr_0 = 1.2666e-04
Loss = 3.8201e-03, PNorm = 185.3065, GNorm = 0.0811, lr_0 = 1.2658e-04
Loss = 3.2775e-03, PNorm = 185.3068, GNorm = 0.1737, lr_0 = 1.2649e-04
Loss = 1.4946e-03, PNorm = 185.3087, GNorm = 0.0827, lr_0 = 1.2640e-04
Loss = 1.8489e-03, PNorm = 185.3113, GNorm = 0.0761, lr_0 = 1.2632e-04
Loss = 1.4806e-03, PNorm = 185.3127, GNorm = 0.2160, lr_0 = 1.2623e-04
Loss = 1.2198e-03, PNorm = 185.3151, GNorm = 0.1078, lr_0 = 1.2614e-04
Loss = 4.9295e-03, PNorm = 185.3179, GNorm = 0.1252, lr_0 = 1.2606e-04
Loss = 4.2093e-03, PNorm = 185.3194, GNorm = 0.0674, lr_0 = 1.2597e-04
Loss = 4.6265e-03, PNorm = 185.3215, GNorm = 0.0618, lr_0 = 1.2588e-04
Loss = 3.5701e-03, PNorm = 185.3225, GNorm = 0.1393, lr_0 = 1.2580e-04
Loss = 1.3145e-03, PNorm = 185.3233, GNorm = 0.0971, lr_0 = 1.2571e-04
Loss = 1.4994e-03, PNorm = 185.3248, GNorm = 0.1259, lr_0 = 1.2563e-04
Loss = 5.3553e-03, PNorm = 185.3261, GNorm = 0.2607, lr_0 = 1.2554e-04
Loss = 7.0146e-03, PNorm = 185.3264, GNorm = 0.1149, lr_0 = 1.2545e-04
Loss = 3.1569e-03, PNorm = 185.3268, GNorm = 0.1955, lr_0 = 1.2537e-04
Loss = 2.3219e-03, PNorm = 185.3272, GNorm = 0.1153, lr_0 = 1.2528e-04
Loss = 2.0417e-03, PNorm = 185.3285, GNorm = 0.1013, lr_0 = 1.2520e-04
Loss = 1.5066e-03, PNorm = 185.3305, GNorm = 0.0870, lr_0 = 1.2511e-04
Loss = 1.1994e-03, PNorm = 185.3322, GNorm = 0.0910, lr_0 = 1.2502e-04
Loss = 2.8467e-03, PNorm = 185.3339, GNorm = 0.0706, lr_0 = 1.2494e-04
Loss = 1.0178e-03, PNorm = 185.3350, GNorm = 0.0301, lr_0 = 1.2485e-04
Loss = 1.2317e-03, PNorm = 185.3358, GNorm = 0.0757, lr_0 = 1.2477e-04
Loss = 5.2860e-03, PNorm = 185.3370, GNorm = 0.0789, lr_0 = 1.2468e-04
Loss = 4.5344e-03, PNorm = 185.3381, GNorm = 0.0585, lr_0 = 1.2460e-04
Loss = 1.3664e-03, PNorm = 185.3384, GNorm = 0.0955, lr_0 = 1.2451e-04
Loss = 1.8088e-03, PNorm = 185.3401, GNorm = 0.0872, lr_0 = 1.2443e-04
Loss = 2.0098e-03, PNorm = 185.3424, GNorm = 0.1442, lr_0 = 1.2434e-04
Loss = 4.8245e-03, PNorm = 185.3434, GNorm = 0.1007, lr_0 = 1.2426e-04
Loss = 3.6020e-03, PNorm = 185.3443, GNorm = 0.0569, lr_0 = 1.2417e-04
Loss = 2.5194e-03, PNorm = 185.3464, GNorm = 0.0457, lr_0 = 1.2409e-04
Loss = 1.9607e-03, PNorm = 185.3483, GNorm = 0.1136, lr_0 = 1.2400e-04
Loss = 5.5562e-03, PNorm = 185.3498, GNorm = 0.1204, lr_0 = 1.2392e-04
Loss = 3.7816e-03, PNorm = 185.3516, GNorm = 0.1004, lr_0 = 1.2383e-04
Loss = 3.1041e-03, PNorm = 185.3523, GNorm = 0.0862, lr_0 = 1.2375e-04
Loss = 2.7682e-03, PNorm = 185.3548, GNorm = 0.0713, lr_0 = 1.2366e-04
Loss = 1.7047e-03, PNorm = 185.3557, GNorm = 0.0495, lr_0 = 1.2358e-04
Loss = 2.6059e-03, PNorm = 185.3567, GNorm = 0.1317, lr_0 = 1.2349e-04
Loss = 1.0511e-03, PNorm = 185.3578, GNorm = 0.1007, lr_0 = 1.2341e-04
Loss = 2.0108e-03, PNorm = 185.3595, GNorm = 0.0484, lr_0 = 1.2332e-04
Loss = 1.8216e-03, PNorm = 185.3620, GNorm = 0.0776, lr_0 = 1.2324e-04
Loss = 2.2399e-03, PNorm = 185.3626, GNorm = 0.1235, lr_0 = 1.2315e-04
Loss = 2.5108e-03, PNorm = 185.3649, GNorm = 0.1073, lr_0 = 1.2307e-04
Loss = 2.1018e-03, PNorm = 185.3668, GNorm = 0.1497, lr_0 = 1.2298e-04
Loss = 2.8325e-03, PNorm = 185.3685, GNorm = 0.1475, lr_0 = 1.2290e-04
Loss = 1.5393e-03, PNorm = 185.3689, GNorm = 0.0749, lr_0 = 1.2282e-04
Loss = 5.3012e-03, PNorm = 185.3707, GNorm = 0.0887, lr_0 = 1.2273e-04
Loss = 5.2186e-03, PNorm = 185.3715, GNorm = 0.1525, lr_0 = 1.2265e-04
Loss = 1.4852e-03, PNorm = 185.3714, GNorm = 0.3023, lr_0 = 1.2256e-04
Loss = 3.4405e-03, PNorm = 185.3735, GNorm = 0.1505, lr_0 = 1.2248e-04
Loss = 2.2358e-03, PNorm = 185.3747, GNorm = 0.1375, lr_0 = 1.2240e-04
Loss = 7.4243e-03, PNorm = 185.3762, GNorm = 0.0883, lr_0 = 1.2231e-04
Loss = 2.8060e-03, PNorm = 185.3786, GNorm = 0.1109, lr_0 = 1.2223e-04
Loss = 6.1696e-03, PNorm = 185.3801, GNorm = 0.0864, lr_0 = 1.2214e-04
Loss = 3.5236e-03, PNorm = 185.3802, GNorm = 0.1172, lr_0 = 1.2206e-04
Loss = 1.9932e-03, PNorm = 185.3802, GNorm = 0.1424, lr_0 = 1.2198e-04
Loss = 1.1135e-03, PNorm = 185.3806, GNorm = 0.0556, lr_0 = 1.2189e-04
Loss = 5.2263e-03, PNorm = 185.3823, GNorm = 0.1006, lr_0 = 1.2181e-04
Loss = 3.5218e-03, PNorm = 185.3835, GNorm = 0.0425, lr_0 = 1.2173e-04
Loss = 2.8219e-03, PNorm = 185.3843, GNorm = 0.2799, lr_0 = 1.2164e-04
Loss = 1.5292e-03, PNorm = 185.3843, GNorm = 0.1401, lr_0 = 1.2156e-04
Loss = 2.2090e-03, PNorm = 185.3856, GNorm = 0.2413, lr_0 = 1.2148e-04
Loss = 9.2502e-03, PNorm = 185.3881, GNorm = 0.0525, lr_0 = 1.2139e-04
Loss = 1.4143e-03, PNorm = 185.3891, GNorm = 0.0959, lr_0 = 1.2131e-04
Loss = 1.5593e-03, PNorm = 185.3922, GNorm = 0.1283, lr_0 = 1.2123e-04
Loss = 1.4238e-03, PNorm = 185.3938, GNorm = 0.0492, lr_0 = 1.2114e-04
Loss = 8.5560e-03, PNorm = 185.3938, GNorm = 1.2629, lr_0 = 1.2106e-04
Loss = 1.3620e-03, PNorm = 185.3948, GNorm = 0.0709, lr_0 = 1.2098e-04
Loss = 2.1819e-03, PNorm = 185.3966, GNorm = 0.1883, lr_0 = 1.2090e-04
Loss = 2.2217e-03, PNorm = 185.3989, GNorm = 0.1518, lr_0 = 1.2081e-04
Loss = 1.5006e-03, PNorm = 185.3999, GNorm = 0.2030, lr_0 = 1.2073e-04
Loss = 3.4676e-03, PNorm = 185.4005, GNorm = 0.0617, lr_0 = 1.2065e-04
Loss = 3.2682e-03, PNorm = 185.4016, GNorm = 0.0663, lr_0 = 1.2056e-04
Loss = 3.3101e-03, PNorm = 185.4036, GNorm = 0.3712, lr_0 = 1.2048e-04
Loss = 1.6216e-03, PNorm = 185.4067, GNorm = 0.0913, lr_0 = 1.2040e-04
Loss = 3.3995e-03, PNorm = 185.4080, GNorm = 0.1893, lr_0 = 1.2032e-04
Loss = 3.3159e-03, PNorm = 185.4089, GNorm = 0.0729, lr_0 = 1.2023e-04
Loss = 7.0256e-03, PNorm = 185.4111, GNorm = 0.0512, lr_0 = 1.2015e-04
Loss = 5.1049e-03, PNorm = 185.4122, GNorm = 0.0767, lr_0 = 1.2007e-04
Loss = 4.2143e-03, PNorm = 185.4148, GNorm = 0.2105, lr_0 = 1.1999e-04
Loss = 1.4482e-03, PNorm = 185.4172, GNorm = 0.1671, lr_0 = 1.1991e-04
Loss = 2.3776e-03, PNorm = 185.4187, GNorm = 0.0673, lr_0 = 1.1982e-04
Loss = 1.6978e-03, PNorm = 185.4218, GNorm = 0.1438, lr_0 = 1.1974e-04
Loss = 2.1835e-03, PNorm = 185.4247, GNorm = 0.1341, lr_0 = 1.1966e-04
Loss = 2.2197e-03, PNorm = 185.4267, GNorm = 0.1650, lr_0 = 1.1958e-04
Loss = 5.1987e-03, PNorm = 185.4293, GNorm = 0.0604, lr_0 = 1.1950e-04
Loss = 6.3916e-03, PNorm = 185.4320, GNorm = 0.0868, lr_0 = 1.1941e-04
Loss = 3.8938e-03, PNorm = 185.4343, GNorm = 0.1155, lr_0 = 1.1933e-04
Loss = 3.4100e-03, PNorm = 185.4374, GNorm = 0.0959, lr_0 = 1.1925e-04
Loss = 4.5234e-03, PNorm = 185.4404, GNorm = 0.2630, lr_0 = 1.1917e-04
Loss = 2.9968e-03, PNorm = 185.4429, GNorm = 0.1189, lr_0 = 1.1909e-04
Loss = 1.5421e-03, PNorm = 185.4453, GNorm = 0.0798, lr_0 = 1.1901e-04
Loss = 1.0348e-02, PNorm = 185.4478, GNorm = 0.1957, lr_0 = 1.1892e-04
Loss = 1.3456e-03, PNorm = 185.4498, GNorm = 0.1077, lr_0 = 1.1884e-04
Loss = 2.6137e-03, PNorm = 185.4514, GNorm = 0.1209, lr_0 = 1.1876e-04
Loss = 1.3309e-03, PNorm = 185.4533, GNorm = 0.1047, lr_0 = 1.1868e-04
Loss = 5.4969e-03, PNorm = 185.4543, GNorm = 0.1343, lr_0 = 1.1860e-04
Loss = 7.4633e-03, PNorm = 185.4551, GNorm = 0.1670, lr_0 = 1.1852e-04
Loss = 3.6186e-03, PNorm = 185.4563, GNorm = 0.0717, lr_0 = 1.1844e-04
Loss = 3.7143e-03, PNorm = 185.4581, GNorm = 0.1075, lr_0 = 1.1835e-04
Loss = 1.5707e-03, PNorm = 185.4600, GNorm = 0.1171, lr_0 = 1.1827e-04
Loss = 7.3296e-03, PNorm = 185.4602, GNorm = 0.1407, lr_0 = 1.1819e-04
Loss = 7.8830e-03, PNorm = 185.4615, GNorm = 0.1304, lr_0 = 1.1811e-04
Loss = 1.3921e-03, PNorm = 185.4643, GNorm = 0.0917, lr_0 = 1.1803e-04
Loss = 2.8696e-03, PNorm = 185.4672, GNorm = 0.0669, lr_0 = 1.1795e-04
Loss = 1.0291e-03, PNorm = 185.4696, GNorm = 0.0898, lr_0 = 1.1787e-04
Validation mae = 0.120948
Epoch 28
Loss = 1.7201e-03, PNorm = 185.4717, GNorm = 0.0979, lr_0 = 1.1779e-04
Loss = 1.1076e-03, PNorm = 185.4737, GNorm = 0.1029, lr_0 = 1.1771e-04
Loss = 2.1960e-03, PNorm = 185.4750, GNorm = 0.0857, lr_0 = 1.1763e-04
Loss = 4.7979e-03, PNorm = 185.4766, GNorm = 0.0478, lr_0 = 1.1755e-04
Loss = 2.1601e-03, PNorm = 185.4782, GNorm = 0.0737, lr_0 = 1.1747e-04
Loss = 2.7576e-03, PNorm = 185.4791, GNorm = 0.1043, lr_0 = 1.1739e-04
Loss = 6.8425e-03, PNorm = 185.4804, GNorm = 0.4607, lr_0 = 1.1730e-04
Loss = 2.3133e-03, PNorm = 185.4804, GNorm = 0.1230, lr_0 = 1.1722e-04
Loss = 2.5641e-03, PNorm = 185.4806, GNorm = 0.1862, lr_0 = 1.1714e-04
Loss = 3.4546e-03, PNorm = 185.4816, GNorm = 0.0418, lr_0 = 1.1706e-04
Loss = 1.8362e-03, PNorm = 185.4826, GNorm = 0.1552, lr_0 = 1.1698e-04
Loss = 2.1742e-03, PNorm = 185.4832, GNorm = 0.0828, lr_0 = 1.1690e-04
Loss = 5.9568e-03, PNorm = 185.4848, GNorm = 0.0686, lr_0 = 1.1682e-04
Loss = 2.9962e-03, PNorm = 185.4858, GNorm = 0.1693, lr_0 = 1.1674e-04
Loss = 1.4177e-03, PNorm = 185.4858, GNorm = 0.1169, lr_0 = 1.1666e-04
Loss = 3.7798e-03, PNorm = 185.4870, GNorm = 0.1390, lr_0 = 1.1658e-04
Loss = 6.6550e-03, PNorm = 185.4879, GNorm = 0.3081, lr_0 = 1.1650e-04
Loss = 2.2766e-03, PNorm = 185.4887, GNorm = 0.0812, lr_0 = 1.1642e-04
Loss = 4.1764e-03, PNorm = 185.4900, GNorm = 0.0380, lr_0 = 1.1634e-04
Loss = 1.2187e-03, PNorm = 185.4910, GNorm = 0.0918, lr_0 = 1.1626e-04
Loss = 1.0948e-03, PNorm = 185.4935, GNorm = 0.0458, lr_0 = 1.1618e-04
Loss = 2.9624e-03, PNorm = 185.4949, GNorm = 0.2006, lr_0 = 1.1611e-04
Loss = 2.9373e-03, PNorm = 185.4954, GNorm = 0.0787, lr_0 = 1.1603e-04
Loss = 2.1073e-03, PNorm = 185.4963, GNorm = 0.1236, lr_0 = 1.1595e-04
Loss = 3.5442e-03, PNorm = 185.4969, GNorm = 0.0846, lr_0 = 1.1587e-04
Loss = 1.1816e-03, PNorm = 185.4985, GNorm = 0.0830, lr_0 = 1.1579e-04
Loss = 1.3395e-03, PNorm = 185.4997, GNorm = 0.1235, lr_0 = 1.1571e-04
Loss = 1.2328e-03, PNorm = 185.5012, GNorm = 0.0483, lr_0 = 1.1563e-04
Loss = 1.8161e-03, PNorm = 185.5027, GNorm = 0.1303, lr_0 = 1.1555e-04
Loss = 4.9526e-03, PNorm = 185.5042, GNorm = 0.1522, lr_0 = 1.1547e-04
Loss = 2.3721e-03, PNorm = 185.5051, GNorm = 0.0872, lr_0 = 1.1539e-04
Loss = 6.7553e-03, PNorm = 185.5065, GNorm = 0.0771, lr_0 = 1.1531e-04
Loss = 1.5891e-03, PNorm = 185.5075, GNorm = 0.1083, lr_0 = 1.1523e-04
Loss = 1.1353e-03, PNorm = 185.5085, GNorm = 0.1641, lr_0 = 1.1515e-04
Loss = 1.2546e-03, PNorm = 185.5096, GNorm = 0.0952, lr_0 = 1.1508e-04
Loss = 5.0861e-03, PNorm = 185.5113, GNorm = 0.0733, lr_0 = 1.1500e-04
Loss = 2.4603e-03, PNorm = 185.5135, GNorm = 0.0466, lr_0 = 1.1492e-04
Loss = 1.3697e-03, PNorm = 185.5155, GNorm = 0.1266, lr_0 = 1.1484e-04
Loss = 1.6368e-03, PNorm = 185.5178, GNorm = 0.1124, lr_0 = 1.1476e-04
Loss = 1.6853e-03, PNorm = 185.5192, GNorm = 0.1497, lr_0 = 1.1468e-04
Loss = 3.0003e-03, PNorm = 185.5214, GNorm = 0.0649, lr_0 = 1.1460e-04
Loss = 3.1120e-03, PNorm = 185.5240, GNorm = 0.0546, lr_0 = 1.1452e-04
Loss = 2.7074e-03, PNorm = 185.5254, GNorm = 0.1068, lr_0 = 1.1445e-04
Loss = 2.7309e-03, PNorm = 185.5252, GNorm = 0.0685, lr_0 = 1.1437e-04
Loss = 1.3096e-03, PNorm = 185.5260, GNorm = 0.0786, lr_0 = 1.1429e-04
Loss = 3.2645e-03, PNorm = 185.5277, GNorm = 0.0826, lr_0 = 1.1421e-04
Loss = 1.2979e-03, PNorm = 185.5287, GNorm = 0.0847, lr_0 = 1.1413e-04
Loss = 1.8742e-03, PNorm = 185.5285, GNorm = 0.0673, lr_0 = 1.1405e-04
Loss = 1.0619e-03, PNorm = 185.5285, GNorm = 0.1794, lr_0 = 1.1398e-04
Loss = 1.8713e-03, PNorm = 185.5297, GNorm = 0.2264, lr_0 = 1.1390e-04
Loss = 4.1887e-03, PNorm = 185.5302, GNorm = 0.0546, lr_0 = 1.1382e-04
Loss = 2.9044e-03, PNorm = 185.5318, GNorm = 0.1809, lr_0 = 1.1374e-04
Loss = 2.0704e-03, PNorm = 185.5342, GNorm = 0.0872, lr_0 = 1.1366e-04
Loss = 3.3002e-03, PNorm = 185.5360, GNorm = 0.0587, lr_0 = 1.1359e-04
Loss = 5.0157e-03, PNorm = 185.5381, GNorm = 0.4474, lr_0 = 1.1351e-04
Loss = 6.0645e-03, PNorm = 185.5402, GNorm = 0.0618, lr_0 = 1.1343e-04
Loss = 3.5414e-03, PNorm = 185.5417, GNorm = 0.0815, lr_0 = 1.1335e-04
Loss = 5.5221e-03, PNorm = 185.5431, GNorm = 0.1229, lr_0 = 1.1328e-04
Loss = 9.5699e-04, PNorm = 185.5446, GNorm = 0.0486, lr_0 = 1.1320e-04
Loss = 3.6179e-03, PNorm = 185.5461, GNorm = 0.1264, lr_0 = 1.1312e-04
Loss = 1.4916e-03, PNorm = 185.5477, GNorm = 0.0647, lr_0 = 1.1304e-04
Loss = 4.0066e-03, PNorm = 185.5480, GNorm = 0.1659, lr_0 = 1.1297e-04
Loss = 2.8174e-03, PNorm = 185.5499, GNorm = 0.0684, lr_0 = 1.1289e-04
Loss = 8.5164e-03, PNorm = 185.5525, GNorm = 1.4074, lr_0 = 1.1281e-04
Loss = 3.1882e-03, PNorm = 185.5532, GNorm = 0.1130, lr_0 = 1.1273e-04
Loss = 1.5412e-03, PNorm = 185.5553, GNorm = 0.0696, lr_0 = 1.1266e-04
Loss = 1.6513e-03, PNorm = 185.5570, GNorm = 0.0927, lr_0 = 1.1258e-04
Loss = 1.8343e-03, PNorm = 185.5589, GNorm = 0.1136, lr_0 = 1.1250e-04
Loss = 4.9901e-03, PNorm = 185.5606, GNorm = 0.0805, lr_0 = 1.1243e-04
Loss = 2.1489e-03, PNorm = 185.5622, GNorm = 0.1180, lr_0 = 1.1235e-04
Loss = 2.0437e-03, PNorm = 185.5639, GNorm = 0.0543, lr_0 = 1.1227e-04
Loss = 1.3910e-03, PNorm = 185.5664, GNorm = 0.0831, lr_0 = 1.1219e-04
Loss = 1.5629e-03, PNorm = 185.5685, GNorm = 0.3961, lr_0 = 1.1212e-04
Loss = 2.6174e-03, PNorm = 185.5699, GNorm = 0.0713, lr_0 = 1.1204e-04
Loss = 1.8070e-03, PNorm = 185.5712, GNorm = 0.2884, lr_0 = 1.1196e-04
Loss = 3.5236e-03, PNorm = 185.5740, GNorm = 1.4551, lr_0 = 1.1189e-04
Loss = 9.5415e-03, PNorm = 185.5779, GNorm = 0.0927, lr_0 = 1.1181e-04
Loss = 2.4445e-03, PNorm = 185.5783, GNorm = 0.1014, lr_0 = 1.1173e-04
Loss = 4.7311e-03, PNorm = 185.5793, GNorm = 0.1569, lr_0 = 1.1166e-04
Loss = 1.9552e-03, PNorm = 185.5807, GNorm = 0.0654, lr_0 = 1.1158e-04
Loss = 2.0872e-03, PNorm = 185.5813, GNorm = 0.0739, lr_0 = 1.1150e-04
Loss = 2.7884e-03, PNorm = 185.5825, GNorm = 0.1198, lr_0 = 1.1143e-04
Loss = 2.7536e-03, PNorm = 185.5836, GNorm = 0.0699, lr_0 = 1.1135e-04
Loss = 3.2739e-03, PNorm = 185.5840, GNorm = 0.1227, lr_0 = 1.1128e-04
Loss = 1.0551e-03, PNorm = 185.5851, GNorm = 0.0745, lr_0 = 1.1120e-04
Loss = 1.1962e-03, PNorm = 185.5858, GNorm = 0.1091, lr_0 = 1.1112e-04
Loss = 1.4491e-03, PNorm = 185.5851, GNorm = 0.0755, lr_0 = 1.1105e-04
Loss = 5.3115e-03, PNorm = 185.5868, GNorm = 0.0845, lr_0 = 1.1097e-04
Loss = 1.2062e-03, PNorm = 185.5887, GNorm = 0.0459, lr_0 = 1.1089e-04
Loss = 1.0182e-03, PNorm = 185.5901, GNorm = 0.0958, lr_0 = 1.1082e-04
Loss = 3.1649e-03, PNorm = 185.5924, GNorm = 0.0906, lr_0 = 1.1074e-04
Loss = 2.2986e-03, PNorm = 185.5938, GNorm = 0.1513, lr_0 = 1.1067e-04
Loss = 4.3794e-03, PNorm = 185.5947, GNorm = 0.0491, lr_0 = 1.1059e-04
Loss = 2.5126e-03, PNorm = 185.5958, GNorm = 0.4034, lr_0 = 1.1052e-04
Loss = 1.6750e-03, PNorm = 185.6001, GNorm = 0.3072, lr_0 = 1.1044e-04
Loss = 1.1043e-03, PNorm = 185.6005, GNorm = 0.1305, lr_0 = 1.1036e-04
Loss = 4.9464e-03, PNorm = 185.6007, GNorm = 0.1716, lr_0 = 1.1029e-04
Loss = 1.5158e-03, PNorm = 185.6014, GNorm = 0.0814, lr_0 = 1.1021e-04
Loss = 2.0462e-03, PNorm = 185.6016, GNorm = 0.1909, lr_0 = 1.1014e-04
Loss = 4.2486e-03, PNorm = 185.6011, GNorm = 0.1607, lr_0 = 1.1006e-04
Loss = 2.4059e-03, PNorm = 185.6023, GNorm = 0.0568, lr_0 = 1.0999e-04
Loss = 1.2725e-03, PNorm = 185.6045, GNorm = 0.1928, lr_0 = 1.0991e-04
Loss = 2.6761e-03, PNorm = 185.6072, GNorm = 0.1515, lr_0 = 1.0984e-04
Loss = 1.7643e-02, PNorm = 185.6090, GNorm = 0.1757, lr_0 = 1.0976e-04
Loss = 2.5228e-03, PNorm = 185.6092, GNorm = 0.1900, lr_0 = 1.0969e-04
Loss = 3.6396e-03, PNorm = 185.6089, GNorm = 0.2963, lr_0 = 1.0961e-04
Loss = 2.1006e-03, PNorm = 185.6112, GNorm = 0.0809, lr_0 = 1.0954e-04
Loss = 1.5024e-03, PNorm = 185.6113, GNorm = 0.0937, lr_0 = 1.0946e-04
Loss = 3.5064e-03, PNorm = 185.6124, GNorm = 0.1061, lr_0 = 1.0939e-04
Loss = 1.2598e-03, PNorm = 185.6143, GNorm = 0.0957, lr_0 = 1.0931e-04
Loss = 4.8572e-03, PNorm = 185.6159, GNorm = 0.0848, lr_0 = 1.0924e-04
Loss = 5.6898e-03, PNorm = 185.6171, GNorm = 0.1642, lr_0 = 1.0916e-04
Loss = 3.0824e-03, PNorm = 185.6177, GNorm = 0.1656, lr_0 = 1.0909e-04
Loss = 1.0072e-03, PNorm = 185.6197, GNorm = 0.0803, lr_0 = 1.0901e-04
Loss = 5.5804e-03, PNorm = 185.6211, GNorm = 0.0459, lr_0 = 1.0894e-04
Loss = 2.3307e-03, PNorm = 185.6218, GNorm = 0.2599, lr_0 = 1.0886e-04
Loss = 1.3761e-03, PNorm = 185.6235, GNorm = 0.0458, lr_0 = 1.0879e-04
Loss = 6.7110e-03, PNorm = 185.6251, GNorm = 0.0869, lr_0 = 1.0871e-04
Loss = 9.1294e-03, PNorm = 185.6258, GNorm = 0.2347, lr_0 = 1.0864e-04
Loss = 2.5048e-03, PNorm = 185.6262, GNorm = 0.0724, lr_0 = 1.0856e-04
Validation mae = 0.120709
Epoch 29
Loss = 2.5884e-03, PNorm = 185.6285, GNorm = 0.1038, lr_0 = 1.0849e-04
Loss = 2.0789e-03, PNorm = 185.6307, GNorm = 0.0838, lr_0 = 1.0841e-04
Loss = 1.7653e-03, PNorm = 185.6317, GNorm = 0.0479, lr_0 = 1.0834e-04
Loss = 2.5681e-03, PNorm = 185.6317, GNorm = 0.2395, lr_0 = 1.0827e-04
Loss = 2.4782e-03, PNorm = 185.6355, GNorm = 0.0912, lr_0 = 1.0819e-04
Loss = 1.3829e-03, PNorm = 185.6378, GNorm = 0.1373, lr_0 = 1.0812e-04
Loss = 2.6914e-03, PNorm = 185.6391, GNorm = 0.5989, lr_0 = 1.0804e-04
Loss = 3.4501e-03, PNorm = 185.6402, GNorm = 0.1851, lr_0 = 1.0797e-04
Loss = 1.1416e-03, PNorm = 185.6428, GNorm = 0.1541, lr_0 = 1.0790e-04
Loss = 4.7455e-03, PNorm = 185.6444, GNorm = 0.0735, lr_0 = 1.0782e-04
Loss = 2.3995e-03, PNorm = 185.6466, GNorm = 0.0372, lr_0 = 1.0775e-04
Loss = 1.3802e-03, PNorm = 185.6487, GNorm = 0.1316, lr_0 = 1.0767e-04
Loss = 1.8942e-03, PNorm = 185.6497, GNorm = 0.0517, lr_0 = 1.0760e-04
Loss = 1.7131e-03, PNorm = 185.6502, GNorm = 0.0356, lr_0 = 1.0753e-04
Loss = 1.8570e-03, PNorm = 185.6504, GNorm = 0.1043, lr_0 = 1.0745e-04
Loss = 2.9854e-03, PNorm = 185.6511, GNorm = 0.0466, lr_0 = 1.0738e-04
Loss = 1.4871e-03, PNorm = 185.6521, GNorm = 0.0685, lr_0 = 1.0731e-04
Loss = 5.4885e-03, PNorm = 185.6526, GNorm = 0.0764, lr_0 = 1.0723e-04
Loss = 1.3699e-03, PNorm = 185.6538, GNorm = 0.0725, lr_0 = 1.0716e-04
Loss = 7.4783e-03, PNorm = 185.6559, GNorm = 0.0695, lr_0 = 1.0709e-04
Loss = 3.8408e-03, PNorm = 185.6579, GNorm = 0.0720, lr_0 = 1.0701e-04
Loss = 5.0771e-03, PNorm = 185.6601, GNorm = 0.0753, lr_0 = 1.0694e-04
Loss = 3.4508e-03, PNorm = 185.6624, GNorm = 0.0704, lr_0 = 1.0687e-04
Loss = 1.7448e-03, PNorm = 185.6632, GNorm = 0.0733, lr_0 = 1.0679e-04
Loss = 1.1679e-02, PNorm = 185.6639, GNorm = 2.7412, lr_0 = 1.0672e-04
Loss = 1.7309e-03, PNorm = 185.6660, GNorm = 0.1590, lr_0 = 1.0665e-04
Loss = 1.0028e-03, PNorm = 185.6659, GNorm = 0.1092, lr_0 = 1.0657e-04
Loss = 1.1644e-03, PNorm = 185.6676, GNorm = 0.0740, lr_0 = 1.0650e-04
Loss = 1.0065e-03, PNorm = 185.6672, GNorm = 0.0434, lr_0 = 1.0643e-04
Loss = 1.6984e-03, PNorm = 185.6674, GNorm = 0.1103, lr_0 = 1.0635e-04
Loss = 2.1565e-03, PNorm = 185.6678, GNorm = 0.5179, lr_0 = 1.0628e-04
Loss = 3.3645e-03, PNorm = 185.6693, GNorm = 0.1143, lr_0 = 1.0621e-04
Loss = 1.0529e-03, PNorm = 185.6702, GNorm = 0.0967, lr_0 = 1.0614e-04
Loss = 2.1774e-03, PNorm = 185.6703, GNorm = 0.0723, lr_0 = 1.0606e-04
Loss = 7.8540e-03, PNorm = 185.6699, GNorm = 0.0990, lr_0 = 1.0599e-04
Loss = 5.8006e-03, PNorm = 185.6714, GNorm = 0.0743, lr_0 = 1.0592e-04
Loss = 2.4578e-03, PNorm = 185.6731, GNorm = 0.1116, lr_0 = 1.0585e-04
Loss = 1.0902e-03, PNorm = 185.6736, GNorm = 0.1070, lr_0 = 1.0577e-04
Loss = 2.0199e-03, PNorm = 185.6740, GNorm = 0.0641, lr_0 = 1.0570e-04
Loss = 1.1271e-03, PNorm = 185.6755, GNorm = 0.1486, lr_0 = 1.0563e-04
Loss = 3.5944e-03, PNorm = 185.6774, GNorm = 0.2599, lr_0 = 1.0556e-04
Loss = 3.9870e-03, PNorm = 185.6782, GNorm = 0.1748, lr_0 = 1.0548e-04
Loss = 2.2812e-03, PNorm = 185.6792, GNorm = 0.0773, lr_0 = 1.0541e-04
Loss = 3.4506e-03, PNorm = 185.6808, GNorm = 0.1776, lr_0 = 1.0534e-04
Loss = 3.9762e-03, PNorm = 185.6821, GNorm = 0.1050, lr_0 = 1.0527e-04
Loss = 5.8220e-03, PNorm = 185.6831, GNorm = 0.0912, lr_0 = 1.0519e-04
Loss = 3.6021e-03, PNorm = 185.6850, GNorm = 0.0386, lr_0 = 1.0512e-04
Loss = 3.4188e-03, PNorm = 185.6857, GNorm = 0.3920, lr_0 = 1.0505e-04
Loss = 5.5047e-03, PNorm = 185.6862, GNorm = 0.2919, lr_0 = 1.0498e-04
Loss = 3.1417e-03, PNorm = 185.6868, GNorm = 0.1559, lr_0 = 1.0491e-04
Loss = 2.0816e-03, PNorm = 185.6873, GNorm = 0.0449, lr_0 = 1.0483e-04
Loss = 4.3443e-03, PNorm = 185.6895, GNorm = 0.1101, lr_0 = 1.0476e-04
Loss = 1.0637e-03, PNorm = 185.6914, GNorm = 0.1643, lr_0 = 1.0469e-04
Loss = 5.2423e-03, PNorm = 185.6934, GNorm = 0.0659, lr_0 = 1.0462e-04
Loss = 2.8507e-03, PNorm = 185.6950, GNorm = 0.0942, lr_0 = 1.0455e-04
Loss = 2.0281e-03, PNorm = 185.6960, GNorm = 0.0366, lr_0 = 1.0448e-04
Loss = 5.2290e-03, PNorm = 185.6977, GNorm = 1.1228, lr_0 = 1.0440e-04
Loss = 1.5006e-03, PNorm = 185.6995, GNorm = 0.0595, lr_0 = 1.0433e-04
Loss = 3.2018e-03, PNorm = 185.7015, GNorm = 0.0537, lr_0 = 1.0426e-04
Loss = 2.8491e-03, PNorm = 185.7036, GNorm = 0.0471, lr_0 = 1.0419e-04
Loss = 1.9087e-03, PNorm = 185.7051, GNorm = 0.0814, lr_0 = 1.0412e-04
Loss = 2.1148e-03, PNorm = 185.7062, GNorm = 0.1490, lr_0 = 1.0405e-04
Loss = 1.7722e-03, PNorm = 185.7075, GNorm = 0.0854, lr_0 = 1.0398e-04
Loss = 1.1466e-03, PNorm = 185.7089, GNorm = 0.0698, lr_0 = 1.0391e-04
Loss = 2.2929e-03, PNorm = 185.7097, GNorm = 0.0895, lr_0 = 1.0383e-04
Loss = 2.3810e-03, PNorm = 185.7116, GNorm = 0.1901, lr_0 = 1.0376e-04
Loss = 1.6373e-03, PNorm = 185.7130, GNorm = 0.0394, lr_0 = 1.0369e-04
Loss = 4.6789e-03, PNorm = 185.7134, GNorm = 0.0404, lr_0 = 1.0362e-04
Loss = 3.5761e-03, PNorm = 185.7143, GNorm = 0.3292, lr_0 = 1.0355e-04
Loss = 2.7242e-03, PNorm = 185.7152, GNorm = 0.1459, lr_0 = 1.0348e-04
Loss = 1.0907e-03, PNorm = 185.7161, GNorm = 0.0991, lr_0 = 1.0341e-04
Loss = 1.1040e-03, PNorm = 185.7174, GNorm = 0.1417, lr_0 = 1.0334e-04
Loss = 1.7893e-03, PNorm = 185.7185, GNorm = 0.1943, lr_0 = 1.0327e-04
Loss = 2.2143e-03, PNorm = 185.7191, GNorm = 0.0386, lr_0 = 1.0320e-04
Loss = 1.2474e-03, PNorm = 185.7199, GNorm = 0.0590, lr_0 = 1.0312e-04
Loss = 5.0065e-03, PNorm = 185.7209, GNorm = 0.0386, lr_0 = 1.0305e-04
Loss = 2.9425e-03, PNorm = 185.7217, GNorm = 0.0470, lr_0 = 1.0298e-04
Loss = 3.1665e-03, PNorm = 185.7231, GNorm = 0.1216, lr_0 = 1.0291e-04
Loss = 3.8026e-03, PNorm = 185.7240, GNorm = 0.0430, lr_0 = 1.0284e-04
Loss = 8.6539e-04, PNorm = 185.7245, GNorm = 0.1071, lr_0 = 1.0277e-04
Loss = 9.8421e-03, PNorm = 185.7256, GNorm = 0.0950, lr_0 = 1.0270e-04
Loss = 2.3045e-03, PNorm = 185.7268, GNorm = 0.1608, lr_0 = 1.0263e-04
Loss = 3.6805e-03, PNorm = 185.7271, GNorm = 0.0483, lr_0 = 1.0256e-04
Loss = 1.0513e-03, PNorm = 185.7275, GNorm = 0.0995, lr_0 = 1.0249e-04
Loss = 3.7747e-03, PNorm = 185.7293, GNorm = 0.0977, lr_0 = 1.0242e-04
Loss = 2.1052e-03, PNorm = 185.7304, GNorm = 0.0454, lr_0 = 1.0235e-04
Loss = 7.9606e-04, PNorm = 185.7320, GNorm = 0.0679, lr_0 = 1.0228e-04
Loss = 6.0104e-03, PNorm = 185.7329, GNorm = 0.0768, lr_0 = 1.0221e-04
Loss = 2.1778e-03, PNorm = 185.7345, GNorm = 0.1110, lr_0 = 1.0214e-04
Loss = 2.2919e-03, PNorm = 185.7355, GNorm = 0.0627, lr_0 = 1.0207e-04
Loss = 1.1641e-03, PNorm = 185.7371, GNorm = 0.0790, lr_0 = 1.0200e-04
Loss = 4.5144e-03, PNorm = 185.7383, GNorm = 0.1427, lr_0 = 1.0193e-04
Loss = 1.0639e-03, PNorm = 185.7394, GNorm = 0.0969, lr_0 = 1.0186e-04
Loss = 4.2675e-03, PNorm = 185.7402, GNorm = 0.0944, lr_0 = 1.0179e-04
Loss = 1.6204e-03, PNorm = 185.7407, GNorm = 0.0379, lr_0 = 1.0172e-04
Loss = 3.1543e-03, PNorm = 185.7423, GNorm = 0.1438, lr_0 = 1.0165e-04
Loss = 1.5518e-03, PNorm = 185.7430, GNorm = 0.0624, lr_0 = 1.0158e-04
Loss = 1.8837e-03, PNorm = 185.7450, GNorm = 0.1590, lr_0 = 1.0151e-04
Loss = 5.4539e-03, PNorm = 185.7467, GNorm = 0.1060, lr_0 = 1.0144e-04
Loss = 1.6382e-03, PNorm = 185.7473, GNorm = 0.0895, lr_0 = 1.0137e-04
Loss = 3.6975e-03, PNorm = 185.7479, GNorm = 0.0628, lr_0 = 1.0130e-04
Loss = 3.3040e-03, PNorm = 185.7494, GNorm = 0.0948, lr_0 = 1.0123e-04
Loss = 3.3357e-03, PNorm = 185.7505, GNorm = 0.1551, lr_0 = 1.0116e-04
Loss = 1.2845e-03, PNorm = 185.7502, GNorm = 0.0981, lr_0 = 1.0110e-04
Loss = 1.7451e-03, PNorm = 185.7509, GNorm = 0.0922, lr_0 = 1.0103e-04
Loss = 1.6703e-03, PNorm = 185.7518, GNorm = 0.1062, lr_0 = 1.0096e-04
Loss = 3.7627e-03, PNorm = 185.7525, GNorm = 0.0799, lr_0 = 1.0089e-04
Loss = 1.0424e-03, PNorm = 185.7535, GNorm = 0.1150, lr_0 = 1.0082e-04
Loss = 5.6198e-03, PNorm = 185.7549, GNorm = 0.0367, lr_0 = 1.0075e-04
Loss = 1.3692e-03, PNorm = 185.7563, GNorm = 0.0455, lr_0 = 1.0068e-04
Loss = 2.5626e-03, PNorm = 185.7579, GNorm = 0.2029, lr_0 = 1.0061e-04
Loss = 2.7999e-03, PNorm = 185.7588, GNorm = 0.3103, lr_0 = 1.0054e-04
Loss = 3.9948e-03, PNorm = 185.7601, GNorm = 0.0669, lr_0 = 1.0047e-04
Loss = 3.2180e-03, PNorm = 185.7602, GNorm = 0.0985, lr_0 = 1.0041e-04
Loss = 3.6546e-03, PNorm = 185.7609, GNorm = 0.1397, lr_0 = 1.0034e-04
Loss = 1.3869e-03, PNorm = 185.7620, GNorm = 0.0778, lr_0 = 1.0027e-04
Loss = 1.4463e-03, PNorm = 185.7626, GNorm = 0.1154, lr_0 = 1.0020e-04
Loss = 1.4761e-03, PNorm = 185.7643, GNorm = 0.0444, lr_0 = 1.0013e-04
Loss = 2.6758e-03, PNorm = 185.7664, GNorm = 0.2250, lr_0 = 1.0006e-04
Loss = 2.7636e-03, PNorm = 185.7672, GNorm = 0.0622, lr_0 = 1.0000e-04
Validation mae = 0.120850
Model 0 best validation mae = 0.120709 on epoch 28
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119594
Ensemble test mae = 0.119594
Fold 8
Splitting data with seed 8
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.6844e-01, PNorm = 64.6287, GNorm = 2.8268, lr_0 = 1.0413e-04
Loss = 8.4848e-01, PNorm = 64.6398, GNorm = 1.8321, lr_0 = 1.0788e-04
Loss = 7.4676e-01, PNorm = 64.6507, GNorm = 3.1639, lr_0 = 1.1163e-04
Loss = 7.5486e-01, PNorm = 64.6606, GNorm = 4.3647, lr_0 = 1.1537e-04
Loss = 6.6395e-01, PNorm = 64.6700, GNorm = 1.8588, lr_0 = 1.1913e-04
Loss = 6.9291e-01, PNorm = 64.6788, GNorm = 2.2207, lr_0 = 1.2287e-04
Loss = 6.8472e-01, PNorm = 64.6880, GNorm = 2.4792, lr_0 = 1.2663e-04
Loss = 6.6639e-01, PNorm = 64.6987, GNorm = 2.7035, lr_0 = 1.3038e-04
Loss = 6.5128e-01, PNorm = 64.7075, GNorm = 2.4035, lr_0 = 1.3413e-04
Loss = 6.9733e-01, PNorm = 64.7170, GNorm = 2.6333, lr_0 = 1.3788e-04
Loss = 6.6329e-01, PNorm = 64.7290, GNorm = 2.6738, lr_0 = 1.4163e-04
Loss = 6.3487e-01, PNorm = 64.7398, GNorm = 2.3387, lr_0 = 1.4537e-04
Loss = 6.9593e-01, PNorm = 64.7511, GNorm = 2.2133, lr_0 = 1.4913e-04
Loss = 6.5909e-01, PNorm = 64.7633, GNorm = 3.0123, lr_0 = 1.5288e-04
Loss = 6.6560e-01, PNorm = 64.7762, GNorm = 3.8003, lr_0 = 1.5662e-04
Loss = 6.2349e-01, PNorm = 64.7896, GNorm = 2.3182, lr_0 = 1.6038e-04
Loss = 6.8642e-01, PNorm = 64.8009, GNorm = 2.5392, lr_0 = 1.6412e-04
Loss = 6.3434e-01, PNorm = 64.8130, GNorm = 2.2650, lr_0 = 1.6788e-04
Loss = 5.9422e-01, PNorm = 64.8251, GNorm = 2.4649, lr_0 = 1.7163e-04
Loss = 5.7430e-01, PNorm = 64.8373, GNorm = 2.4399, lr_0 = 1.7538e-04
Loss = 5.4564e-01, PNorm = 64.8495, GNorm = 1.8942, lr_0 = 1.7913e-04
Loss = 5.8816e-01, PNorm = 64.8647, GNorm = 1.8988, lr_0 = 1.8288e-04
Loss = 6.3584e-01, PNorm = 64.8801, GNorm = 2.7486, lr_0 = 1.8662e-04
Loss = 6.1435e-01, PNorm = 64.8949, GNorm = 1.9799, lr_0 = 1.9038e-04
Loss = 7.7964e-01, PNorm = 64.9127, GNorm = 5.1660, lr_0 = 1.9413e-04
Loss = 6.4313e-01, PNorm = 64.9307, GNorm = 2.5416, lr_0 = 1.9788e-04
Loss = 6.5036e-01, PNorm = 64.9482, GNorm = 1.8083, lr_0 = 2.0163e-04
Loss = 6.1057e-01, PNorm = 64.9652, GNorm = 3.4115, lr_0 = 2.0537e-04
Loss = 5.3037e-01, PNorm = 64.9819, GNorm = 2.5107, lr_0 = 2.0913e-04
Loss = 5.9774e-01, PNorm = 64.9979, GNorm = 1.9714, lr_0 = 2.1288e-04
Loss = 6.2212e-01, PNorm = 65.0153, GNorm = 3.3881, lr_0 = 2.1663e-04
Loss = 6.3790e-01, PNorm = 65.0339, GNorm = 2.2025, lr_0 = 2.2038e-04
Loss = 4.7201e-01, PNorm = 65.0550, GNorm = 2.0922, lr_0 = 2.2412e-04
Loss = 6.1188e-01, PNorm = 65.0712, GNorm = 1.3181, lr_0 = 2.2787e-04
Loss = 4.8150e-01, PNorm = 65.0867, GNorm = 1.6650, lr_0 = 2.3163e-04
Loss = 5.0908e-01, PNorm = 65.1049, GNorm = 3.0620, lr_0 = 2.3538e-04
Loss = 5.7182e-01, PNorm = 65.1205, GNorm = 2.0306, lr_0 = 2.3913e-04
Loss = 5.0127e-01, PNorm = 65.1372, GNorm = 1.4960, lr_0 = 2.4288e-04
Loss = 5.7961e-01, PNorm = 65.1572, GNorm = 2.2293, lr_0 = 2.4662e-04
Loss = 5.3073e-01, PNorm = 65.1773, GNorm = 1.5334, lr_0 = 2.5038e-04
Loss = 5.8549e-01, PNorm = 65.1997, GNorm = 1.8461, lr_0 = 2.5413e-04
Loss = 7.1977e-01, PNorm = 65.2226, GNorm = 1.6745, lr_0 = 2.5788e-04
Loss = 6.3984e-01, PNorm = 65.2488, GNorm = 3.2278, lr_0 = 2.6163e-04
Loss = 4.8257e-01, PNorm = 65.2719, GNorm = 1.5822, lr_0 = 2.6537e-04
Loss = 5.0879e-01, PNorm = 65.2931, GNorm = 2.0663, lr_0 = 2.6912e-04
Loss = 5.3538e-01, PNorm = 65.3107, GNorm = 1.2445, lr_0 = 2.7288e-04
Loss = 5.4970e-01, PNorm = 65.3339, GNorm = 1.5744, lr_0 = 2.7663e-04
Loss = 5.8799e-01, PNorm = 65.3558, GNorm = 1.4678, lr_0 = 2.8038e-04
Loss = 5.3684e-01, PNorm = 65.3776, GNorm = 1.8356, lr_0 = 2.8413e-04
Loss = 5.2467e-01, PNorm = 65.4018, GNorm = 2.3326, lr_0 = 2.8787e-04
Loss = 5.3811e-01, PNorm = 65.4248, GNorm = 1.3156, lr_0 = 2.9163e-04
Loss = 5.8349e-01, PNorm = 65.4479, GNorm = 1.6104, lr_0 = 2.9538e-04
Loss = 5.2938e-01, PNorm = 65.4759, GNorm = 1.5991, lr_0 = 2.9913e-04
Loss = 5.5128e-01, PNorm = 65.5032, GNorm = 1.8309, lr_0 = 3.0288e-04
Loss = 5.3182e-01, PNorm = 65.5290, GNorm = 1.9348, lr_0 = 3.0662e-04
Loss = 4.5495e-01, PNorm = 65.5573, GNorm = 1.9204, lr_0 = 3.1037e-04
Loss = 5.2238e-01, PNorm = 65.5841, GNorm = 1.5112, lr_0 = 3.1413e-04
Loss = 4.9318e-01, PNorm = 65.6100, GNorm = 2.1329, lr_0 = 3.1788e-04
Loss = 5.4411e-01, PNorm = 65.6399, GNorm = 1.5843, lr_0 = 3.2163e-04
Loss = 5.3065e-01, PNorm = 65.6720, GNorm = 1.0617, lr_0 = 3.2538e-04
Loss = 4.9329e-01, PNorm = 65.7063, GNorm = 1.3604, lr_0 = 3.2912e-04
Loss = 5.7130e-01, PNorm = 65.7375, GNorm = 1.5672, lr_0 = 3.3288e-04
Loss = 5.3684e-01, PNorm = 65.7738, GNorm = 1.4327, lr_0 = 3.3663e-04
Loss = 5.5414e-01, PNorm = 65.8091, GNorm = 1.4687, lr_0 = 3.4038e-04
Loss = 5.1872e-01, PNorm = 65.8468, GNorm = 1.7910, lr_0 = 3.4413e-04
Loss = 5.7567e-01, PNorm = 65.8878, GNorm = 1.6010, lr_0 = 3.4787e-04
Loss = 5.7825e-01, PNorm = 65.9249, GNorm = 1.6201, lr_0 = 3.5162e-04
Loss = 6.1541e-01, PNorm = 65.9650, GNorm = 1.2875, lr_0 = 3.5538e-04
Loss = 5.5519e-01, PNorm = 66.0061, GNorm = 1.4066, lr_0 = 3.5913e-04
Loss = 5.3450e-01, PNorm = 66.0438, GNorm = 1.7876, lr_0 = 3.6288e-04
Loss = 5.1443e-01, PNorm = 66.0811, GNorm = 1.2927, lr_0 = 3.6662e-04
Loss = 5.1830e-01, PNorm = 66.1161, GNorm = 1.9136, lr_0 = 3.7037e-04
Loss = 4.9782e-01, PNorm = 66.1529, GNorm = 1.3444, lr_0 = 3.7413e-04
Loss = 5.0028e-01, PNorm = 66.1938, GNorm = 1.5405, lr_0 = 3.7788e-04
Loss = 5.7168e-01, PNorm = 66.2298, GNorm = 1.6757, lr_0 = 3.8163e-04
Loss = 4.9503e-01, PNorm = 66.2726, GNorm = 1.8821, lr_0 = 3.8537e-04
Loss = 4.8718e-01, PNorm = 66.3120, GNorm = 1.2638, lr_0 = 3.8912e-04
Loss = 4.7332e-01, PNorm = 66.3477, GNorm = 1.3859, lr_0 = 3.9287e-04
Loss = 4.9642e-01, PNorm = 66.3845, GNorm = 1.5520, lr_0 = 3.9663e-04
Loss = 4.7321e-01, PNorm = 66.4203, GNorm = 1.6259, lr_0 = 4.0038e-04
Loss = 5.3472e-01, PNorm = 66.4635, GNorm = 1.2233, lr_0 = 4.0413e-04
Loss = 5.0224e-01, PNorm = 66.5054, GNorm = 1.4281, lr_0 = 4.0787e-04
Loss = 5.0869e-01, PNorm = 66.5470, GNorm = 1.4075, lr_0 = 4.1162e-04
Loss = 4.8766e-01, PNorm = 66.5968, GNorm = 1.4663, lr_0 = 4.1537e-04
Loss = 5.2202e-01, PNorm = 66.6415, GNorm = 1.3341, lr_0 = 4.1913e-04
Loss = 5.0772e-01, PNorm = 66.6872, GNorm = 1.4978, lr_0 = 4.2288e-04
Loss = 4.9269e-01, PNorm = 66.7304, GNorm = 1.3596, lr_0 = 4.2662e-04
Loss = 4.7210e-01, PNorm = 66.7781, GNorm = 1.1719, lr_0 = 4.3037e-04
Loss = 4.8355e-01, PNorm = 66.8287, GNorm = 1.5795, lr_0 = 4.3412e-04
Loss = 4.6656e-01, PNorm = 66.8798, GNorm = 1.3005, lr_0 = 4.3788e-04
Loss = 4.5552e-01, PNorm = 66.9282, GNorm = 2.0450, lr_0 = 4.4163e-04
Loss = 4.8898e-01, PNorm = 66.9747, GNorm = 1.5378, lr_0 = 4.4538e-04
Loss = 4.6701e-01, PNorm = 67.0274, GNorm = 1.2239, lr_0 = 4.4912e-04
Loss = 5.7821e-01, PNorm = 67.0742, GNorm = 0.9960, lr_0 = 4.5287e-04
Loss = 5.3572e-01, PNorm = 67.1279, GNorm = 1.3937, lr_0 = 4.5662e-04
Loss = 4.8122e-01, PNorm = 67.1822, GNorm = 1.4236, lr_0 = 4.6038e-04
Loss = 4.7338e-01, PNorm = 67.2305, GNorm = 1.1598, lr_0 = 4.6413e-04
Loss = 4.9413e-01, PNorm = 67.2788, GNorm = 1.3202, lr_0 = 4.6787e-04
Loss = 5.0012e-01, PNorm = 67.3311, GNorm = 1.2916, lr_0 = 4.7162e-04
Loss = 5.3915e-01, PNorm = 67.3886, GNorm = 1.1458, lr_0 = 4.7537e-04
Loss = 4.1400e-01, PNorm = 67.4435, GNorm = 1.0037, lr_0 = 4.7913e-04
Loss = 5.0075e-01, PNorm = 67.5019, GNorm = 1.5659, lr_0 = 4.8288e-04
Loss = 5.2921e-01, PNorm = 67.5588, GNorm = 1.1951, lr_0 = 4.8663e-04
Loss = 5.0445e-01, PNorm = 67.6192, GNorm = 1.3776, lr_0 = 4.9038e-04
Loss = 4.9364e-01, PNorm = 67.6830, GNorm = 1.1481, lr_0 = 4.9412e-04
Loss = 4.6098e-01, PNorm = 67.7469, GNorm = 1.3419, lr_0 = 4.9788e-04
Loss = 5.4522e-01, PNorm = 67.8122, GNorm = 1.5588, lr_0 = 5.0163e-04
Loss = 4.6539e-01, PNorm = 67.8748, GNorm = 1.0449, lr_0 = 5.0538e-04
Loss = 5.3496e-01, PNorm = 67.9396, GNorm = 1.6330, lr_0 = 5.0913e-04
Loss = 4.6698e-01, PNorm = 67.9965, GNorm = 0.8354, lr_0 = 5.1287e-04
Loss = 5.8237e-01, PNorm = 68.0563, GNorm = 1.4538, lr_0 = 5.1663e-04
Loss = 4.7069e-01, PNorm = 68.1195, GNorm = 1.2993, lr_0 = 5.2038e-04
Loss = 5.6337e-01, PNorm = 68.1739, GNorm = 1.2133, lr_0 = 5.2413e-04
Loss = 4.9673e-01, PNorm = 68.2396, GNorm = 1.2341, lr_0 = 5.2788e-04
Loss = 4.9165e-01, PNorm = 68.3011, GNorm = 1.0947, lr_0 = 5.3162e-04
Loss = 4.5814e-01, PNorm = 68.3527, GNorm = 1.1978, lr_0 = 5.3538e-04
Loss = 4.6960e-01, PNorm = 68.4126, GNorm = 1.2227, lr_0 = 5.3912e-04
Loss = 5.7842e-01, PNorm = 68.4687, GNorm = 1.1850, lr_0 = 5.4288e-04
Loss = 4.9214e-01, PNorm = 68.5391, GNorm = 1.7139, lr_0 = 5.4663e-04
Loss = 4.6325e-01, PNorm = 68.6026, GNorm = 1.6514, lr_0 = 5.5038e-04
Validation mae = 0.129185
Epoch 1
Loss = 4.1058e-01, PNorm = 68.6663, GNorm = 1.1343, lr_0 = 5.5413e-04
Loss = 3.4524e-01, PNorm = 68.7366, GNorm = 1.2369, lr_0 = 5.5787e-04
Loss = 3.6930e-01, PNorm = 68.8038, GNorm = 1.5148, lr_0 = 5.6163e-04
Loss = 3.8077e-01, PNorm = 68.8778, GNorm = 1.4128, lr_0 = 5.6538e-04
Loss = 3.7417e-01, PNorm = 68.9494, GNorm = 1.0556, lr_0 = 5.6913e-04
Loss = 3.7293e-01, PNorm = 69.0307, GNorm = 0.9022, lr_0 = 5.7288e-04
Loss = 3.6523e-01, PNorm = 69.1157, GNorm = 1.3026, lr_0 = 5.7662e-04
Loss = 4.1448e-01, PNorm = 69.2057, GNorm = 1.1736, lr_0 = 5.8038e-04
Loss = 3.6524e-01, PNorm = 69.2938, GNorm = 1.4834, lr_0 = 5.8413e-04
Loss = 4.1126e-01, PNorm = 69.3833, GNorm = 1.2158, lr_0 = 5.8788e-04
Loss = 3.6481e-01, PNorm = 69.4783, GNorm = 1.9590, lr_0 = 5.9163e-04
Loss = 4.1192e-01, PNorm = 69.5718, GNorm = 1.1570, lr_0 = 5.9538e-04
Loss = 3.7942e-01, PNorm = 69.6667, GNorm = 1.2049, lr_0 = 5.9913e-04
Loss = 3.3653e-01, PNorm = 69.7696, GNorm = 1.0568, lr_0 = 6.0288e-04
Loss = 3.2986e-01, PNorm = 69.8558, GNorm = 1.0529, lr_0 = 6.0663e-04
Loss = 3.8835e-01, PNorm = 69.9492, GNorm = 1.1238, lr_0 = 6.1038e-04
Loss = 3.4615e-01, PNorm = 70.0571, GNorm = 1.1880, lr_0 = 6.1413e-04
Loss = 3.5058e-01, PNorm = 70.1575, GNorm = 1.2995, lr_0 = 6.1788e-04
Loss = 4.8342e-01, PNorm = 70.2624, GNorm = 1.6665, lr_0 = 6.2163e-04
Loss = 3.7426e-01, PNorm = 70.3742, GNorm = 2.4193, lr_0 = 6.2538e-04
Loss = 3.8109e-01, PNorm = 70.4835, GNorm = 1.5359, lr_0 = 6.2913e-04
Loss = 4.1660e-01, PNorm = 70.5917, GNorm = 1.8700, lr_0 = 6.3288e-04
Loss = 3.1687e-01, PNorm = 70.6986, GNorm = 1.3108, lr_0 = 6.3663e-04
Loss = 3.8920e-01, PNorm = 70.8045, GNorm = 1.2586, lr_0 = 6.4038e-04
Loss = 3.9618e-01, PNorm = 70.9109, GNorm = 1.5268, lr_0 = 6.4413e-04
Loss = 3.9226e-01, PNorm = 71.0177, GNorm = 1.3088, lr_0 = 6.4788e-04
Loss = 4.1865e-01, PNorm = 71.1376, GNorm = 1.0038, lr_0 = 6.5163e-04
Loss = 4.1455e-01, PNorm = 71.2555, GNorm = 2.0048, lr_0 = 6.5538e-04
Loss = 4.5178e-01, PNorm = 71.3686, GNorm = 1.2815, lr_0 = 6.5913e-04
Loss = 4.4614e-01, PNorm = 71.5058, GNorm = 1.7980, lr_0 = 6.6288e-04
Loss = 3.8328e-01, PNorm = 71.6208, GNorm = 1.1072, lr_0 = 6.6663e-04
Loss = 4.4464e-01, PNorm = 71.7461, GNorm = 1.1801, lr_0 = 6.7038e-04
Loss = 4.0099e-01, PNorm = 71.8623, GNorm = 1.3706, lr_0 = 6.7413e-04
Loss = 4.0977e-01, PNorm = 71.9768, GNorm = 1.3456, lr_0 = 6.7788e-04
Loss = 3.5232e-01, PNorm = 72.0892, GNorm = 0.8987, lr_0 = 6.8163e-04
Loss = 4.6391e-01, PNorm = 72.2010, GNorm = 1.3291, lr_0 = 6.8538e-04
Loss = 4.9166e-01, PNorm = 72.3244, GNorm = 1.3034, lr_0 = 6.8913e-04
Loss = 3.9773e-01, PNorm = 72.4585, GNorm = 1.2383, lr_0 = 6.9288e-04
Loss = 4.5147e-01, PNorm = 72.5849, GNorm = 1.2010, lr_0 = 6.9663e-04
Loss = 4.5166e-01, PNorm = 72.7226, GNorm = 1.4082, lr_0 = 7.0038e-04
Loss = 3.9930e-01, PNorm = 72.8752, GNorm = 1.1014, lr_0 = 7.0413e-04
Loss = 3.7879e-01, PNorm = 73.0008, GNorm = 1.0554, lr_0 = 7.0788e-04
Loss = 4.3368e-01, PNorm = 73.1495, GNorm = 1.0594, lr_0 = 7.1163e-04
Loss = 4.9555e-01, PNorm = 73.2994, GNorm = 1.2573, lr_0 = 7.1538e-04
Loss = 4.3747e-01, PNorm = 73.4526, GNorm = 1.0614, lr_0 = 7.1913e-04
Loss = 4.8760e-01, PNorm = 73.6057, GNorm = 1.8364, lr_0 = 7.2288e-04
Loss = 4.2829e-01, PNorm = 73.7439, GNorm = 1.5088, lr_0 = 7.2663e-04
Loss = 4.3839e-01, PNorm = 73.8911, GNorm = 0.9678, lr_0 = 7.3038e-04
Loss = 4.2982e-01, PNorm = 74.0172, GNorm = 1.0427, lr_0 = 7.3413e-04
Loss = 4.2126e-01, PNorm = 74.1546, GNorm = 1.1196, lr_0 = 7.3788e-04
Loss = 4.5523e-01, PNorm = 74.2823, GNorm = 1.4650, lr_0 = 7.4163e-04
Loss = 3.9904e-01, PNorm = 74.4068, GNorm = 0.8214, lr_0 = 7.4538e-04
Loss = 4.0553e-01, PNorm = 74.5434, GNorm = 1.1805, lr_0 = 7.4913e-04
Loss = 3.6933e-01, PNorm = 74.6645, GNorm = 1.1084, lr_0 = 7.5288e-04
Loss = 4.5416e-01, PNorm = 74.7950, GNorm = 1.2512, lr_0 = 7.5663e-04
Loss = 4.5590e-01, PNorm = 74.9336, GNorm = 1.3160, lr_0 = 7.6038e-04
Loss = 4.1457e-01, PNorm = 75.0748, GNorm = 0.8432, lr_0 = 7.6413e-04
Loss = 3.9989e-01, PNorm = 75.2240, GNorm = 1.4293, lr_0 = 7.6788e-04
Loss = 4.1287e-01, PNorm = 75.3593, GNorm = 1.4189, lr_0 = 7.7163e-04
Loss = 4.0146e-01, PNorm = 75.4974, GNorm = 0.8718, lr_0 = 7.7538e-04
Loss = 4.2072e-01, PNorm = 75.6225, GNorm = 1.2463, lr_0 = 7.7913e-04
Loss = 4.3515e-01, PNorm = 75.7371, GNorm = 1.6395, lr_0 = 7.8288e-04
Loss = 4.6193e-01, PNorm = 75.8718, GNorm = 1.1672, lr_0 = 7.8663e-04
Loss = 3.9103e-01, PNorm = 75.9962, GNorm = 1.0982, lr_0 = 7.9038e-04
Loss = 3.9522e-01, PNorm = 76.1176, GNorm = 1.2050, lr_0 = 7.9413e-04
Loss = 4.1848e-01, PNorm = 76.2514, GNorm = 1.2513, lr_0 = 7.9788e-04
Loss = 4.0710e-01, PNorm = 76.3835, GNorm = 1.0473, lr_0 = 8.0163e-04
Loss = 4.1355e-01, PNorm = 76.5191, GNorm = 1.4284, lr_0 = 8.0538e-04
Loss = 4.0025e-01, PNorm = 76.6509, GNorm = 0.8491, lr_0 = 8.0913e-04
Loss = 4.7509e-01, PNorm = 76.7889, GNorm = 1.0222, lr_0 = 8.1288e-04
Loss = 4.1256e-01, PNorm = 76.9272, GNorm = 1.4474, lr_0 = 8.1663e-04
Loss = 4.0208e-01, PNorm = 77.0662, GNorm = 1.1609, lr_0 = 8.2038e-04
Loss = 4.5921e-01, PNorm = 77.2144, GNorm = 1.1392, lr_0 = 8.2413e-04
Loss = 4.2756e-01, PNorm = 77.3681, GNorm = 1.1361, lr_0 = 8.2788e-04
Loss = 4.3745e-01, PNorm = 77.5143, GNorm = 1.1980, lr_0 = 8.3163e-04
Loss = 4.0188e-01, PNorm = 77.6560, GNorm = 1.2637, lr_0 = 8.3538e-04
Loss = 4.4154e-01, PNorm = 77.8046, GNorm = 1.1453, lr_0 = 8.3913e-04
Loss = 3.7434e-01, PNorm = 77.9508, GNorm = 1.1577, lr_0 = 8.4288e-04
Loss = 3.8330e-01, PNorm = 78.0904, GNorm = 0.9048, lr_0 = 8.4663e-04
Loss = 4.7460e-01, PNorm = 78.2425, GNorm = 1.2087, lr_0 = 8.5038e-04
Loss = 4.1821e-01, PNorm = 78.3967, GNorm = 1.1204, lr_0 = 8.5413e-04
Loss = 4.4262e-01, PNorm = 78.5506, GNorm = 0.9064, lr_0 = 8.5788e-04
Loss = 4.2150e-01, PNorm = 78.7087, GNorm = 1.2786, lr_0 = 8.6163e-04
Loss = 4.6456e-01, PNorm = 78.8635, GNorm = 0.9048, lr_0 = 8.6538e-04
Loss = 4.6131e-01, PNorm = 79.0198, GNorm = 1.1279, lr_0 = 8.6913e-04
Loss = 5.0364e-01, PNorm = 79.1733, GNorm = 0.9679, lr_0 = 8.7288e-04
Loss = 4.5201e-01, PNorm = 79.3331, GNorm = 1.8842, lr_0 = 8.7663e-04
Loss = 4.1962e-01, PNorm = 79.4893, GNorm = 0.8571, lr_0 = 8.8038e-04
Loss = 4.8548e-01, PNorm = 79.6429, GNorm = 1.8333, lr_0 = 8.8413e-04
Loss = 4.1668e-01, PNorm = 79.7715, GNorm = 0.9897, lr_0 = 8.8788e-04
Loss = 3.8828e-01, PNorm = 79.9127, GNorm = 1.5232, lr_0 = 8.9163e-04
Loss = 4.1830e-01, PNorm = 80.0478, GNorm = 1.1105, lr_0 = 8.9538e-04
Loss = 4.2202e-01, PNorm = 80.1902, GNorm = 1.2951, lr_0 = 8.9913e-04
Loss = 4.6890e-01, PNorm = 80.3270, GNorm = 0.7193, lr_0 = 9.0288e-04
Loss = 4.6023e-01, PNorm = 80.4736, GNorm = 1.5546, lr_0 = 9.0663e-04
Loss = 3.8206e-01, PNorm = 80.5997, GNorm = 1.2578, lr_0 = 9.1038e-04
Loss = 4.2800e-01, PNorm = 80.7308, GNorm = 1.0118, lr_0 = 9.1413e-04
Loss = 4.0021e-01, PNorm = 80.8531, GNorm = 0.8396, lr_0 = 9.1788e-04
Loss = 4.6267e-01, PNorm = 81.0004, GNorm = 1.2598, lr_0 = 9.2163e-04
Loss = 4.2134e-01, PNorm = 81.1276, GNorm = 0.7767, lr_0 = 9.2538e-04
Loss = 4.2627e-01, PNorm = 81.2759, GNorm = 0.9542, lr_0 = 9.2913e-04
Loss = 5.1657e-01, PNorm = 81.4148, GNorm = 1.6318, lr_0 = 9.3288e-04
Loss = 3.7610e-01, PNorm = 81.5739, GNorm = 1.0768, lr_0 = 9.3663e-04
Loss = 3.9311e-01, PNorm = 81.7287, GNorm = 1.0025, lr_0 = 9.4038e-04
Loss = 4.6472e-01, PNorm = 81.8819, GNorm = 0.9160, lr_0 = 9.4413e-04
Loss = 4.5997e-01, PNorm = 82.0350, GNorm = 1.4395, lr_0 = 9.4788e-04
Loss = 4.1125e-01, PNorm = 82.1964, GNorm = 1.0640, lr_0 = 9.5163e-04
Loss = 4.3594e-01, PNorm = 82.3564, GNorm = 1.4774, lr_0 = 9.5538e-04
Loss = 4.4085e-01, PNorm = 82.5047, GNorm = 1.9646, lr_0 = 9.5913e-04
Loss = 4.3709e-01, PNorm = 82.6689, GNorm = 0.9876, lr_0 = 9.6288e-04
Loss = 4.6166e-01, PNorm = 82.8296, GNorm = 1.2482, lr_0 = 9.6663e-04
Loss = 4.4234e-01, PNorm = 82.9883, GNorm = 0.9824, lr_0 = 9.7038e-04
Loss = 4.2509e-01, PNorm = 83.1563, GNorm = 1.0709, lr_0 = 9.7413e-04
Loss = 5.0213e-01, PNorm = 83.3186, GNorm = 1.7778, lr_0 = 9.7788e-04
Loss = 4.5404e-01, PNorm = 83.4777, GNorm = 1.8115, lr_0 = 9.8163e-04
Loss = 4.2547e-01, PNorm = 83.6495, GNorm = 1.0484, lr_0 = 9.8537e-04
Loss = 5.3327e-01, PNorm = 83.8000, GNorm = 1.2129, lr_0 = 9.8912e-04
Loss = 4.3358e-01, PNorm = 83.9488, GNorm = 1.3781, lr_0 = 9.9288e-04
Loss = 4.1856e-01, PNorm = 84.0937, GNorm = 1.2667, lr_0 = 9.9663e-04
Loss = 4.4636e-01, PNorm = 84.2413, GNorm = 1.3154, lr_0 = 9.9993e-04
Validation mae = 0.127658
Epoch 2
Loss = 3.2320e-01, PNorm = 84.4059, GNorm = 0.9603, lr_0 = 9.9925e-04
Loss = 2.9957e-01, PNorm = 84.5507, GNorm = 1.0787, lr_0 = 9.9856e-04
Loss = 2.9570e-01, PNorm = 84.7240, GNorm = 0.9704, lr_0 = 9.9788e-04
Loss = 2.4145e-01, PNorm = 84.8650, GNorm = 0.7120, lr_0 = 9.9719e-04
Loss = 2.8534e-01, PNorm = 85.0150, GNorm = 1.7671, lr_0 = 9.9651e-04
Loss = 2.5837e-01, PNorm = 85.1615, GNorm = 1.4505, lr_0 = 9.9583e-04
Loss = 2.7092e-01, PNorm = 85.3118, GNorm = 1.6126, lr_0 = 9.9515e-04
Loss = 2.9018e-01, PNorm = 85.4792, GNorm = 1.2142, lr_0 = 9.9446e-04
Loss = 3.1328e-01, PNorm = 85.6381, GNorm = 0.7907, lr_0 = 9.9378e-04
Loss = 2.9505e-01, PNorm = 85.8153, GNorm = 1.3410, lr_0 = 9.9310e-04
Loss = 2.9012e-01, PNorm = 85.9807, GNorm = 1.2949, lr_0 = 9.9242e-04
Loss = 2.8723e-01, PNorm = 86.1464, GNorm = 0.8903, lr_0 = 9.9174e-04
Loss = 3.7284e-01, PNorm = 86.3303, GNorm = 0.8807, lr_0 = 9.9106e-04
Loss = 3.3946e-01, PNorm = 86.5217, GNorm = 0.9487, lr_0 = 9.9038e-04
Loss = 2.6413e-01, PNorm = 86.7238, GNorm = 1.2768, lr_0 = 9.8971e-04
Loss = 2.7930e-01, PNorm = 86.8896, GNorm = 1.2938, lr_0 = 9.8903e-04
Loss = 2.9558e-01, PNorm = 87.0751, GNorm = 0.9423, lr_0 = 9.8835e-04
Loss = 2.7738e-01, PNorm = 87.2574, GNorm = 1.0263, lr_0 = 9.8767e-04
Loss = 2.8804e-01, PNorm = 87.4358, GNorm = 0.9156, lr_0 = 9.8700e-04
Loss = 3.2428e-01, PNorm = 87.6162, GNorm = 1.3401, lr_0 = 9.8632e-04
Loss = 2.7622e-01, PNorm = 87.8092, GNorm = 0.8071, lr_0 = 9.8564e-04
Loss = 2.8430e-01, PNorm = 87.9988, GNorm = 0.7300, lr_0 = 9.8497e-04
Loss = 3.0014e-01, PNorm = 88.2163, GNorm = 1.0576, lr_0 = 9.8429e-04
Loss = 2.5311e-01, PNorm = 88.3877, GNorm = 1.1265, lr_0 = 9.8362e-04
Loss = 2.6532e-01, PNorm = 88.5594, GNorm = 0.8491, lr_0 = 9.8295e-04
Loss = 3.0038e-01, PNorm = 88.7301, GNorm = 1.0235, lr_0 = 9.8227e-04
Loss = 2.7359e-01, PNorm = 88.8961, GNorm = 0.8900, lr_0 = 9.8160e-04
Loss = 2.8477e-01, PNorm = 89.0698, GNorm = 1.4520, lr_0 = 9.8093e-04
Loss = 3.0544e-01, PNorm = 89.2260, GNorm = 0.7989, lr_0 = 9.8026e-04
Loss = 2.8843e-01, PNorm = 89.4086, GNorm = 1.4220, lr_0 = 9.7958e-04
Loss = 3.3606e-01, PNorm = 89.5785, GNorm = 0.9856, lr_0 = 9.7891e-04
Loss = 2.8736e-01, PNorm = 89.7610, GNorm = 0.9488, lr_0 = 9.7824e-04
Loss = 2.9545e-01, PNorm = 89.9341, GNorm = 0.9770, lr_0 = 9.7757e-04
Loss = 2.8113e-01, PNorm = 90.0975, GNorm = 0.9352, lr_0 = 9.7690e-04
Loss = 3.3368e-01, PNorm = 90.2810, GNorm = 0.7605, lr_0 = 9.7623e-04
Loss = 2.6590e-01, PNorm = 90.4536, GNorm = 0.9616, lr_0 = 9.7556e-04
Loss = 2.6575e-01, PNorm = 90.6095, GNorm = 0.9939, lr_0 = 9.7490e-04
Loss = 3.1555e-01, PNorm = 90.7693, GNorm = 0.8800, lr_0 = 9.7423e-04
Loss = 3.0847e-01, PNorm = 90.9313, GNorm = 0.8904, lr_0 = 9.7356e-04
Loss = 3.2678e-01, PNorm = 91.0931, GNorm = 1.3985, lr_0 = 9.7289e-04
Loss = 3.5503e-01, PNorm = 91.2624, GNorm = 1.2040, lr_0 = 9.7223e-04
Loss = 3.2363e-01, PNorm = 91.4439, GNorm = 1.1389, lr_0 = 9.7156e-04
Loss = 3.3876e-01, PNorm = 91.6276, GNorm = 0.9642, lr_0 = 9.7090e-04
Loss = 3.5393e-01, PNorm = 91.8205, GNorm = 1.4552, lr_0 = 9.7023e-04
Loss = 3.1655e-01, PNorm = 92.0035, GNorm = 0.7650, lr_0 = 9.6957e-04
Loss = 2.7610e-01, PNorm = 92.1773, GNorm = 0.9150, lr_0 = 9.6890e-04
Loss = 2.9283e-01, PNorm = 92.3421, GNorm = 0.9155, lr_0 = 9.6824e-04
Loss = 2.7661e-01, PNorm = 92.5023, GNorm = 0.9365, lr_0 = 9.6757e-04
Loss = 3.3282e-01, PNorm = 92.6713, GNorm = 1.3074, lr_0 = 9.6691e-04
Loss = 2.6550e-01, PNorm = 92.8287, GNorm = 1.0223, lr_0 = 9.6625e-04
Loss = 2.7098e-01, PNorm = 92.9942, GNorm = 0.9411, lr_0 = 9.6559e-04
Loss = 3.1136e-01, PNorm = 93.1576, GNorm = 0.9514, lr_0 = 9.6493e-04
Loss = 3.0002e-01, PNorm = 93.3159, GNorm = 0.7605, lr_0 = 9.6427e-04
Loss = 3.7868e-01, PNorm = 93.4658, GNorm = 0.9125, lr_0 = 9.6360e-04
Loss = 3.0064e-01, PNorm = 93.6281, GNorm = 1.2549, lr_0 = 9.6294e-04
Loss = 3.0024e-01, PNorm = 93.7940, GNorm = 1.2681, lr_0 = 9.6228e-04
Loss = 3.3840e-01, PNorm = 93.9514, GNorm = 1.1443, lr_0 = 9.6163e-04
Loss = 3.1514e-01, PNorm = 94.1302, GNorm = 1.0291, lr_0 = 9.6097e-04
Loss = 3.5624e-01, PNorm = 94.2910, GNorm = 2.0991, lr_0 = 9.6031e-04
Loss = 3.2562e-01, PNorm = 94.4515, GNorm = 0.9040, lr_0 = 9.5965e-04
Loss = 3.3678e-01, PNorm = 94.6056, GNorm = 1.6687, lr_0 = 9.5899e-04
Loss = 3.1442e-01, PNorm = 94.7831, GNorm = 1.4872, lr_0 = 9.5834e-04
Loss = 2.8938e-01, PNorm = 94.9284, GNorm = 1.0157, lr_0 = 9.5768e-04
Loss = 3.9244e-01, PNorm = 95.0935, GNorm = 1.7919, lr_0 = 9.5702e-04
Loss = 4.0320e-01, PNorm = 95.2689, GNorm = 1.4656, lr_0 = 9.5637e-04
Loss = 3.1868e-01, PNorm = 95.4430, GNorm = 1.8656, lr_0 = 9.5571e-04
Loss = 3.7076e-01, PNorm = 95.6052, GNorm = 0.9664, lr_0 = 9.5506e-04
Loss = 2.8234e-01, PNorm = 95.7770, GNorm = 1.3657, lr_0 = 9.5440e-04
Loss = 3.4293e-01, PNorm = 95.9269, GNorm = 0.9923, lr_0 = 9.5375e-04
Loss = 3.2364e-01, PNorm = 96.1006, GNorm = 1.1029, lr_0 = 9.5310e-04
Loss = 2.9760e-01, PNorm = 96.2539, GNorm = 1.2555, lr_0 = 9.5244e-04
Loss = 2.9217e-01, PNorm = 96.3987, GNorm = 0.9379, lr_0 = 9.5179e-04
Loss = 2.7489e-01, PNorm = 96.5413, GNorm = 0.9067, lr_0 = 9.5114e-04
Loss = 3.2270e-01, PNorm = 96.6653, GNorm = 1.1099, lr_0 = 9.5049e-04
Loss = 3.7792e-01, PNorm = 96.8211, GNorm = 1.2286, lr_0 = 9.4984e-04
Loss = 3.3026e-01, PNorm = 96.9821, GNorm = 0.7975, lr_0 = 9.4919e-04
Loss = 3.3208e-01, PNorm = 97.1506, GNorm = 0.9823, lr_0 = 9.4854e-04
Loss = 2.7296e-01, PNorm = 97.3059, GNorm = 1.3601, lr_0 = 9.4789e-04
Loss = 3.4035e-01, PNorm = 97.4612, GNorm = 1.4159, lr_0 = 9.4724e-04
Loss = 3.4756e-01, PNorm = 97.6268, GNorm = 0.9232, lr_0 = 9.4659e-04
Loss = 3.2350e-01, PNorm = 97.8011, GNorm = 0.7997, lr_0 = 9.4594e-04
Loss = 3.0602e-01, PNorm = 97.9708, GNorm = 1.6479, lr_0 = 9.4529e-04
Loss = 3.0591e-01, PNorm = 98.1207, GNorm = 1.0338, lr_0 = 9.4464e-04
Loss = 3.0768e-01, PNorm = 98.2622, GNorm = 1.1787, lr_0 = 9.4400e-04
Loss = 3.2196e-01, PNorm = 98.3925, GNorm = 0.8695, lr_0 = 9.4335e-04
Loss = 3.7302e-01, PNorm = 98.5341, GNorm = 0.9371, lr_0 = 9.4270e-04
Loss = 3.1304e-01, PNorm = 98.6747, GNorm = 0.9229, lr_0 = 9.4206e-04
Loss = 3.7549e-01, PNorm = 98.8205, GNorm = 1.0911, lr_0 = 9.4141e-04
Loss = 2.9493e-01, PNorm = 98.9711, GNorm = 1.2813, lr_0 = 9.4077e-04
Loss = 3.6346e-01, PNorm = 99.1146, GNorm = 1.0503, lr_0 = 9.4012e-04
Loss = 3.1462e-01, PNorm = 99.2822, GNorm = 1.2902, lr_0 = 9.3948e-04
Loss = 3.3667e-01, PNorm = 99.4379, GNorm = 1.4373, lr_0 = 9.3884e-04
Loss = 3.5500e-01, PNorm = 99.5809, GNorm = 1.0791, lr_0 = 9.3819e-04
Loss = 3.0732e-01, PNorm = 99.7427, GNorm = 0.9197, lr_0 = 9.3755e-04
Loss = 3.4948e-01, PNorm = 99.8915, GNorm = 0.9747, lr_0 = 9.3691e-04
Loss = 3.9869e-01, PNorm = 100.0307, GNorm = 1.0483, lr_0 = 9.3627e-04
Loss = 3.1249e-01, PNorm = 100.1737, GNorm = 0.7600, lr_0 = 9.3562e-04
Loss = 3.2901e-01, PNorm = 100.3044, GNorm = 0.8819, lr_0 = 9.3498e-04
Loss = 3.2869e-01, PNorm = 100.4553, GNorm = 1.1567, lr_0 = 9.3434e-04
Loss = 3.0733e-01, PNorm = 100.5960, GNorm = 0.9621, lr_0 = 9.3370e-04
Loss = 3.2665e-01, PNorm = 100.7449, GNorm = 0.8822, lr_0 = 9.3306e-04
Loss = 3.0693e-01, PNorm = 100.8918, GNorm = 1.0961, lr_0 = 9.3242e-04
Loss = 3.4135e-01, PNorm = 101.0500, GNorm = 0.9300, lr_0 = 9.3178e-04
Loss = 3.1458e-01, PNorm = 101.1961, GNorm = 1.3328, lr_0 = 9.3115e-04
Loss = 2.8835e-01, PNorm = 101.3494, GNorm = 1.7431, lr_0 = 9.3051e-04
Loss = 3.0774e-01, PNorm = 101.5059, GNorm = 0.9182, lr_0 = 9.2987e-04
Loss = 3.5919e-01, PNorm = 101.6583, GNorm = 0.7333, lr_0 = 9.2923e-04
Loss = 3.3290e-01, PNorm = 101.8272, GNorm = 0.8161, lr_0 = 9.2860e-04
Loss = 3.8853e-01, PNorm = 101.9952, GNorm = 1.2118, lr_0 = 9.2796e-04
Loss = 2.7722e-01, PNorm = 102.1593, GNorm = 0.8493, lr_0 = 9.2733e-04
Loss = 3.1266e-01, PNorm = 102.3241, GNorm = 0.7881, lr_0 = 9.2669e-04
Loss = 3.2897e-01, PNorm = 102.4769, GNorm = 0.9680, lr_0 = 9.2606e-04
Loss = 3.4569e-01, PNorm = 102.6308, GNorm = 1.5581, lr_0 = 9.2542e-04
Loss = 3.1516e-01, PNorm = 102.7779, GNorm = 0.8458, lr_0 = 9.2479e-04
Loss = 3.0054e-01, PNorm = 102.9246, GNorm = 0.8737, lr_0 = 9.2415e-04
Loss = 3.6358e-01, PNorm = 103.0834, GNorm = 1.0098, lr_0 = 9.2352e-04
Loss = 3.0408e-01, PNorm = 103.2425, GNorm = 0.9932, lr_0 = 9.2289e-04
Loss = 2.8534e-01, PNorm = 103.3921, GNorm = 0.6983, lr_0 = 9.2226e-04
Loss = 3.3440e-01, PNorm = 103.5366, GNorm = 1.1870, lr_0 = 9.2162e-04
Loss = 3.3676e-01, PNorm = 103.6835, GNorm = 1.0367, lr_0 = 9.2099e-04
Validation mae = 0.127023
Epoch 3
Loss = 1.8394e-01, PNorm = 103.8201, GNorm = 0.9686, lr_0 = 9.2036e-04
Loss = 1.9205e-01, PNorm = 103.9402, GNorm = 1.3657, lr_0 = 9.1973e-04
Loss = 1.8157e-01, PNorm = 104.0439, GNorm = 0.6517, lr_0 = 9.1910e-04
Loss = 1.6536e-01, PNorm = 104.1454, GNorm = 0.6209, lr_0 = 9.1847e-04
Loss = 2.0253e-01, PNorm = 104.2481, GNorm = 1.4589, lr_0 = 9.1784e-04
Loss = 2.0068e-01, PNorm = 104.3614, GNorm = 1.2256, lr_0 = 9.1721e-04
Loss = 1.8261e-01, PNorm = 104.4705, GNorm = 0.8071, lr_0 = 9.1658e-04
Loss = 1.5407e-01, PNorm = 104.5863, GNorm = 0.7372, lr_0 = 9.1596e-04
Loss = 1.9287e-01, PNorm = 104.6921, GNorm = 0.7595, lr_0 = 9.1533e-04
Loss = 2.2022e-01, PNorm = 104.8062, GNorm = 0.7832, lr_0 = 9.1470e-04
Loss = 2.0577e-01, PNorm = 104.9235, GNorm = 1.2597, lr_0 = 9.1408e-04
Loss = 1.6874e-01, PNorm = 105.0350, GNorm = 0.7030, lr_0 = 9.1345e-04
Loss = 2.1584e-01, PNorm = 105.1468, GNorm = 0.7197, lr_0 = 9.1282e-04
Loss = 1.9051e-01, PNorm = 105.2605, GNorm = 0.8155, lr_0 = 9.1220e-04
Loss = 1.6406e-01, PNorm = 105.3799, GNorm = 1.0109, lr_0 = 9.1157e-04
Loss = 1.8780e-01, PNorm = 105.4947, GNorm = 1.1086, lr_0 = 9.1095e-04
Loss = 1.8906e-01, PNorm = 105.6233, GNorm = 0.9367, lr_0 = 9.1032e-04
Loss = 1.6984e-01, PNorm = 105.7469, GNorm = 1.3259, lr_0 = 9.0970e-04
Loss = 1.7297e-01, PNorm = 105.8642, GNorm = 0.8047, lr_0 = 9.0908e-04
Loss = 1.8828e-01, PNorm = 105.9789, GNorm = 0.7588, lr_0 = 9.0846e-04
Loss = 2.0242e-01, PNorm = 106.0914, GNorm = 0.7314, lr_0 = 9.0783e-04
Loss = 1.7042e-01, PNorm = 106.2027, GNorm = 0.8863, lr_0 = 9.0721e-04
Loss = 1.8844e-01, PNorm = 106.3049, GNorm = 0.6059, lr_0 = 9.0659e-04
Loss = 1.8582e-01, PNorm = 106.4242, GNorm = 0.6386, lr_0 = 9.0597e-04
Loss = 1.4858e-01, PNorm = 106.5388, GNorm = 0.8361, lr_0 = 9.0535e-04
Loss = 1.9061e-01, PNorm = 106.6489, GNorm = 0.7597, lr_0 = 9.0473e-04
Loss = 1.9566e-01, PNorm = 106.7599, GNorm = 0.8235, lr_0 = 9.0411e-04
Loss = 2.0097e-01, PNorm = 106.8773, GNorm = 0.9002, lr_0 = 9.0349e-04
Loss = 1.7863e-01, PNorm = 106.9960, GNorm = 0.9906, lr_0 = 9.0287e-04
Loss = 1.7200e-01, PNorm = 107.1049, GNorm = 0.7342, lr_0 = 9.0225e-04
Loss = 1.8182e-01, PNorm = 107.2072, GNorm = 0.7652, lr_0 = 9.0163e-04
Loss = 2.0053e-01, PNorm = 107.3328, GNorm = 1.2229, lr_0 = 9.0102e-04
Loss = 1.9523e-01, PNorm = 107.4485, GNorm = 0.7983, lr_0 = 9.0040e-04
Loss = 1.7452e-01, PNorm = 107.5746, GNorm = 0.9288, lr_0 = 8.9978e-04
Loss = 1.7533e-01, PNorm = 107.7023, GNorm = 1.7060, lr_0 = 8.9916e-04
Loss = 2.0336e-01, PNorm = 107.8301, GNorm = 0.7553, lr_0 = 8.9855e-04
Loss = 1.8684e-01, PNorm = 107.9595, GNorm = 0.8734, lr_0 = 8.9793e-04
Loss = 2.4797e-01, PNorm = 108.0952, GNorm = 1.0555, lr_0 = 8.9732e-04
Loss = 2.0789e-01, PNorm = 108.2202, GNorm = 0.7370, lr_0 = 8.9670e-04
Loss = 1.7247e-01, PNorm = 108.3535, GNorm = 0.6898, lr_0 = 8.9609e-04
Loss = 1.9782e-01, PNorm = 108.4849, GNorm = 0.9254, lr_0 = 8.9548e-04
Loss = 1.8890e-01, PNorm = 108.5911, GNorm = 1.0619, lr_0 = 8.9486e-04
Loss = 1.9669e-01, PNorm = 108.6942, GNorm = 0.4220, lr_0 = 8.9425e-04
Loss = 2.1257e-01, PNorm = 108.8075, GNorm = 0.9981, lr_0 = 8.9364e-04
Loss = 1.9532e-01, PNorm = 108.9254, GNorm = 0.6682, lr_0 = 8.9302e-04
Loss = 2.0748e-01, PNorm = 109.0390, GNorm = 0.9871, lr_0 = 8.9241e-04
Loss = 1.9448e-01, PNorm = 109.1556, GNorm = 0.9137, lr_0 = 8.9180e-04
Loss = 2.3720e-01, PNorm = 109.2833, GNorm = 0.7209, lr_0 = 8.9119e-04
Loss = 1.9005e-01, PNorm = 109.4106, GNorm = 0.8365, lr_0 = 8.9058e-04
Loss = 2.1100e-01, PNorm = 109.5437, GNorm = 0.9986, lr_0 = 8.8997e-04
Loss = 1.8348e-01, PNorm = 109.6591, GNorm = 1.2226, lr_0 = 8.8936e-04
Loss = 1.9186e-01, PNorm = 109.7883, GNorm = 0.8843, lr_0 = 8.8875e-04
Loss = 2.1819e-01, PNorm = 109.9080, GNorm = 1.0245, lr_0 = 8.8814e-04
Loss = 1.9313e-01, PNorm = 110.0415, GNorm = 0.7880, lr_0 = 8.8753e-04
Loss = 1.7584e-01, PNorm = 110.1588, GNorm = 0.9776, lr_0 = 8.8693e-04
Loss = 1.7503e-01, PNorm = 110.2690, GNorm = 0.9706, lr_0 = 8.8632e-04
Loss = 1.8954e-01, PNorm = 110.3858, GNorm = 1.0237, lr_0 = 8.8571e-04
Loss = 1.8581e-01, PNorm = 110.5019, GNorm = 0.7971, lr_0 = 8.8510e-04
Loss = 1.9121e-01, PNorm = 110.6214, GNorm = 0.9700, lr_0 = 8.8450e-04
Loss = 1.9223e-01, PNorm = 110.7268, GNorm = 0.8330, lr_0 = 8.8389e-04
Loss = 2.0356e-01, PNorm = 110.8457, GNorm = 0.7892, lr_0 = 8.8329e-04
Loss = 2.1410e-01, PNorm = 110.9789, GNorm = 1.4496, lr_0 = 8.8268e-04
Loss = 1.9295e-01, PNorm = 111.1010, GNorm = 0.9307, lr_0 = 8.8208e-04
Loss = 1.9455e-01, PNorm = 111.2233, GNorm = 0.5371, lr_0 = 8.8147e-04
Loss = 1.9227e-01, PNorm = 111.3468, GNorm = 1.0755, lr_0 = 8.8087e-04
Loss = 2.1562e-01, PNorm = 111.4736, GNorm = 1.2502, lr_0 = 8.8026e-04
Loss = 2.0895e-01, PNorm = 111.6117, GNorm = 1.0218, lr_0 = 8.7966e-04
Loss = 2.2561e-01, PNorm = 111.7439, GNorm = 0.6582, lr_0 = 8.7906e-04
Loss = 1.9089e-01, PNorm = 111.8789, GNorm = 0.8753, lr_0 = 8.7846e-04
Loss = 2.1000e-01, PNorm = 111.9977, GNorm = 0.6783, lr_0 = 8.7785e-04
Loss = 1.9223e-01, PNorm = 112.1142, GNorm = 1.0918, lr_0 = 8.7725e-04
Loss = 1.8994e-01, PNorm = 112.2289, GNorm = 0.8469, lr_0 = 8.7665e-04
Loss = 1.8529e-01, PNorm = 112.3407, GNorm = 0.9068, lr_0 = 8.7605e-04
Loss = 2.0264e-01, PNorm = 112.4571, GNorm = 0.7460, lr_0 = 8.7545e-04
Loss = 2.4709e-01, PNorm = 112.5837, GNorm = 1.6126, lr_0 = 8.7485e-04
Loss = 2.1930e-01, PNorm = 112.7023, GNorm = 0.9037, lr_0 = 8.7425e-04
Loss = 2.0205e-01, PNorm = 112.8299, GNorm = 1.3467, lr_0 = 8.7365e-04
Loss = 1.9597e-01, PNorm = 112.9526, GNorm = 0.7229, lr_0 = 8.7306e-04
Loss = 2.0537e-01, PNorm = 113.0754, GNorm = 0.6344, lr_0 = 8.7246e-04
Loss = 2.1292e-01, PNorm = 113.2045, GNorm = 1.0892, lr_0 = 8.7186e-04
Loss = 1.9790e-01, PNorm = 113.3183, GNorm = 0.6439, lr_0 = 8.7126e-04
Loss = 2.0337e-01, PNorm = 113.4356, GNorm = 0.8905, lr_0 = 8.7067e-04
Loss = 1.9352e-01, PNorm = 113.5503, GNorm = 0.8486, lr_0 = 8.7007e-04
Loss = 2.2198e-01, PNorm = 113.6799, GNorm = 1.2193, lr_0 = 8.6947e-04
Loss = 2.3580e-01, PNorm = 113.8084, GNorm = 1.1986, lr_0 = 8.6888e-04
Loss = 2.0642e-01, PNorm = 113.9457, GNorm = 1.1807, lr_0 = 8.6828e-04
Loss = 1.9937e-01, PNorm = 114.0780, GNorm = 0.8415, lr_0 = 8.6769e-04
Loss = 2.3676e-01, PNorm = 114.2158, GNorm = 1.1870, lr_0 = 8.6709e-04
Loss = 2.1024e-01, PNorm = 114.3336, GNorm = 1.0715, lr_0 = 8.6650e-04
Loss = 2.0204e-01, PNorm = 114.4597, GNorm = 0.7088, lr_0 = 8.6590e-04
Loss = 2.2566e-01, PNorm = 114.5860, GNorm = 2.0218, lr_0 = 8.6531e-04
Loss = 2.0708e-01, PNorm = 114.7082, GNorm = 0.8094, lr_0 = 8.6472e-04
Loss = 2.1989e-01, PNorm = 114.8412, GNorm = 1.3401, lr_0 = 8.6413e-04
Loss = 2.0599e-01, PNorm = 114.9630, GNorm = 1.4830, lr_0 = 8.6353e-04
Loss = 2.7307e-01, PNorm = 115.0908, GNorm = 1.3513, lr_0 = 8.6294e-04
Loss = 2.3981e-01, PNorm = 115.2325, GNorm = 0.8757, lr_0 = 8.6235e-04
Loss = 2.5017e-01, PNorm = 115.3719, GNorm = 1.4212, lr_0 = 8.6176e-04
Loss = 2.0410e-01, PNorm = 115.5005, GNorm = 0.6182, lr_0 = 8.6117e-04
Loss = 2.0998e-01, PNorm = 115.6213, GNorm = 0.9117, lr_0 = 8.6058e-04
Loss = 2.1425e-01, PNorm = 115.7288, GNorm = 0.8213, lr_0 = 8.5999e-04
Loss = 2.5151e-01, PNorm = 115.8536, GNorm = 1.3918, lr_0 = 8.5940e-04
Loss = 2.2102e-01, PNorm = 115.9713, GNorm = 1.0528, lr_0 = 8.5881e-04
Loss = 2.4613e-01, PNorm = 116.1028, GNorm = 0.8931, lr_0 = 8.5823e-04
Loss = 2.4023e-01, PNorm = 116.2478, GNorm = 0.7375, lr_0 = 8.5764e-04
Loss = 2.1933e-01, PNorm = 116.3803, GNorm = 0.9590, lr_0 = 8.5705e-04
Loss = 2.3257e-01, PNorm = 116.5073, GNorm = 0.8890, lr_0 = 8.5646e-04
Loss = 2.0316e-01, PNorm = 116.6326, GNorm = 0.9808, lr_0 = 8.5588e-04
Loss = 2.2260e-01, PNorm = 116.7459, GNorm = 0.5563, lr_0 = 8.5529e-04
Loss = 2.4023e-01, PNorm = 116.8670, GNorm = 1.1808, lr_0 = 8.5470e-04
Loss = 2.2360e-01, PNorm = 116.9837, GNorm = 0.8624, lr_0 = 8.5412e-04
Loss = 2.0272e-01, PNorm = 117.1070, GNorm = 0.8552, lr_0 = 8.5353e-04
Loss = 2.4947e-01, PNorm = 117.2283, GNorm = 0.9491, lr_0 = 8.5295e-04
Loss = 2.4420e-01, PNorm = 117.3566, GNorm = 1.0585, lr_0 = 8.5236e-04
Loss = 2.2347e-01, PNorm = 117.4836, GNorm = 0.8364, lr_0 = 8.5178e-04
Loss = 2.3831e-01, PNorm = 117.6203, GNorm = 1.1518, lr_0 = 8.5120e-04
Loss = 2.3721e-01, PNorm = 117.7413, GNorm = 0.7759, lr_0 = 8.5061e-04
Loss = 2.0895e-01, PNorm = 117.8691, GNorm = 0.8122, lr_0 = 8.5003e-04
Loss = 2.0263e-01, PNorm = 117.9908, GNorm = 0.8292, lr_0 = 8.4945e-04
Loss = 2.1896e-01, PNorm = 118.1201, GNorm = 0.8219, lr_0 = 8.4887e-04
Loss = 2.1249e-01, PNorm = 118.2380, GNorm = 1.0292, lr_0 = 8.4828e-04
Validation mae = 0.126322
Epoch 4
Loss = 1.3467e-01, PNorm = 118.3485, GNorm = 0.7445, lr_0 = 8.4770e-04
Loss = 1.0812e-01, PNorm = 118.4451, GNorm = 0.7716, lr_0 = 8.4712e-04
Loss = 1.2808e-01, PNorm = 118.5288, GNorm = 0.5926, lr_0 = 8.4654e-04
Loss = 1.2170e-01, PNorm = 118.6112, GNorm = 1.1126, lr_0 = 8.4596e-04
Loss = 1.2568e-01, PNorm = 118.7043, GNorm = 0.6724, lr_0 = 8.4538e-04
Loss = 1.3727e-01, PNorm = 118.7830, GNorm = 0.8479, lr_0 = 8.4480e-04
Loss = 1.2578e-01, PNorm = 118.8763, GNorm = 0.6190, lr_0 = 8.4423e-04
Loss = 1.0204e-01, PNorm = 118.9557, GNorm = 0.5115, lr_0 = 8.4365e-04
Loss = 1.1512e-01, PNorm = 119.0411, GNorm = 0.8520, lr_0 = 8.4307e-04
Loss = 1.0986e-01, PNorm = 119.1185, GNorm = 0.8863, lr_0 = 8.4249e-04
Loss = 1.1744e-01, PNorm = 119.1955, GNorm = 0.5351, lr_0 = 8.4191e-04
Loss = 1.1086e-01, PNorm = 119.2648, GNorm = 0.6826, lr_0 = 8.4134e-04
Loss = 1.5392e-01, PNorm = 119.3446, GNorm = 0.7887, lr_0 = 8.4076e-04
Loss = 1.0826e-01, PNorm = 119.4145, GNorm = 0.5975, lr_0 = 8.4019e-04
Loss = 1.0293e-01, PNorm = 119.4916, GNorm = 0.8934, lr_0 = 8.3961e-04
Loss = 1.2290e-01, PNorm = 119.5612, GNorm = 0.7371, lr_0 = 8.3903e-04
Loss = 1.0110e-01, PNorm = 119.6322, GNorm = 0.8458, lr_0 = 8.3846e-04
Loss = 1.1464e-01, PNorm = 119.6969, GNorm = 0.5329, lr_0 = 8.3789e-04
Loss = 9.7259e-02, PNorm = 119.7649, GNorm = 0.7638, lr_0 = 8.3731e-04
Loss = 8.8814e-02, PNorm = 119.8312, GNorm = 0.7469, lr_0 = 8.3674e-04
Loss = 1.1896e-01, PNorm = 119.8988, GNorm = 0.9262, lr_0 = 8.3616e-04
Loss = 1.0553e-01, PNorm = 119.9702, GNorm = 0.6903, lr_0 = 8.3559e-04
Loss = 1.2329e-01, PNorm = 120.0448, GNorm = 1.1104, lr_0 = 8.3502e-04
Loss = 1.0490e-01, PNorm = 120.1209, GNorm = 0.5060, lr_0 = 8.3445e-04
Loss = 1.1676e-01, PNorm = 120.1877, GNorm = 0.7937, lr_0 = 8.3388e-04
Loss = 1.2576e-01, PNorm = 120.2662, GNorm = 1.0805, lr_0 = 8.3330e-04
Loss = 1.2232e-01, PNorm = 120.3457, GNorm = 0.7087, lr_0 = 8.3273e-04
Loss = 1.0641e-01, PNorm = 120.4225, GNorm = 0.5686, lr_0 = 8.3216e-04
Loss = 1.2850e-01, PNorm = 120.5028, GNorm = 1.1496, lr_0 = 8.3159e-04
Loss = 1.4191e-01, PNorm = 120.5909, GNorm = 0.7031, lr_0 = 8.3102e-04
Loss = 1.1920e-01, PNorm = 120.6804, GNorm = 0.5621, lr_0 = 8.3045e-04
Loss = 1.1690e-01, PNorm = 120.7676, GNorm = 0.5679, lr_0 = 8.2988e-04
Loss = 1.3652e-01, PNorm = 120.8473, GNorm = 0.6560, lr_0 = 8.2932e-04
Loss = 1.2988e-01, PNorm = 120.9386, GNorm = 0.9870, lr_0 = 8.2875e-04
Loss = 1.3022e-01, PNorm = 121.0302, GNorm = 0.8378, lr_0 = 8.2818e-04
Loss = 1.3014e-01, PNorm = 121.1115, GNorm = 0.7022, lr_0 = 8.2761e-04
Loss = 1.2322e-01, PNorm = 121.2010, GNorm = 0.6273, lr_0 = 8.2705e-04
Loss = 1.3627e-01, PNorm = 121.2799, GNorm = 0.5553, lr_0 = 8.2648e-04
Loss = 1.2070e-01, PNorm = 121.3661, GNorm = 0.6996, lr_0 = 8.2591e-04
Loss = 1.3709e-01, PNorm = 121.4533, GNorm = 0.9001, lr_0 = 8.2535e-04
Loss = 1.2976e-01, PNorm = 121.5472, GNorm = 1.3655, lr_0 = 8.2478e-04
Loss = 1.1859e-01, PNorm = 121.6488, GNorm = 0.7304, lr_0 = 8.2422e-04
Loss = 1.2735e-01, PNorm = 121.7374, GNorm = 0.5190, lr_0 = 8.2365e-04
Loss = 1.1372e-01, PNorm = 121.8273, GNorm = 0.7163, lr_0 = 8.2309e-04
Loss = 1.2583e-01, PNorm = 121.9049, GNorm = 0.9800, lr_0 = 8.2252e-04
Loss = 1.0788e-01, PNorm = 121.9881, GNorm = 0.9465, lr_0 = 8.2196e-04
Loss = 1.7035e-01, PNorm = 122.0853, GNorm = 0.8539, lr_0 = 8.2140e-04
Loss = 1.2220e-01, PNorm = 122.1912, GNorm = 0.7144, lr_0 = 8.2084e-04
Loss = 1.0788e-01, PNorm = 122.2788, GNorm = 0.4951, lr_0 = 8.2027e-04
Loss = 1.1436e-01, PNorm = 122.3638, GNorm = 0.8732, lr_0 = 8.1971e-04
Loss = 1.3978e-01, PNorm = 122.4460, GNorm = 0.7117, lr_0 = 8.1915e-04
Loss = 1.1798e-01, PNorm = 122.5339, GNorm = 0.6017, lr_0 = 8.1859e-04
Loss = 1.3462e-01, PNorm = 122.6260, GNorm = 0.7556, lr_0 = 8.1803e-04
Loss = 1.1843e-01, PNorm = 122.7220, GNorm = 0.8190, lr_0 = 8.1747e-04
Loss = 1.3634e-01, PNorm = 122.8088, GNorm = 0.4661, lr_0 = 8.1691e-04
Loss = 1.0913e-01, PNorm = 122.9003, GNorm = 0.6273, lr_0 = 8.1635e-04
Loss = 1.0635e-01, PNorm = 122.9845, GNorm = 0.5899, lr_0 = 8.1579e-04
Loss = 1.2896e-01, PNorm = 123.0788, GNorm = 0.6308, lr_0 = 8.1523e-04
Loss = 1.3433e-01, PNorm = 123.1617, GNorm = 0.5712, lr_0 = 8.1467e-04
Loss = 1.2249e-01, PNorm = 123.2418, GNorm = 0.5125, lr_0 = 8.1411e-04
Loss = 1.5822e-01, PNorm = 123.3307, GNorm = 0.7774, lr_0 = 8.1355e-04
Loss = 1.3238e-01, PNorm = 123.4197, GNorm = 0.6905, lr_0 = 8.1300e-04
Loss = 1.3899e-01, PNorm = 123.5082, GNorm = 0.6711, lr_0 = 8.1244e-04
Loss = 1.2347e-01, PNorm = 123.5952, GNorm = 0.9093, lr_0 = 8.1188e-04
Loss = 1.3825e-01, PNorm = 123.6875, GNorm = 0.5719, lr_0 = 8.1133e-04
Loss = 1.4540e-01, PNorm = 123.7843, GNorm = 0.6454, lr_0 = 8.1077e-04
Loss = 1.5340e-01, PNorm = 123.8867, GNorm = 0.9094, lr_0 = 8.1022e-04
Loss = 1.3367e-01, PNorm = 123.9922, GNorm = 0.7827, lr_0 = 8.0966e-04
Loss = 1.3266e-01, PNorm = 124.0839, GNorm = 0.6179, lr_0 = 8.0911e-04
Loss = 1.3283e-01, PNorm = 124.1759, GNorm = 1.1622, lr_0 = 8.0855e-04
Loss = 1.4212e-01, PNorm = 124.2668, GNorm = 0.8379, lr_0 = 8.0800e-04
Loss = 1.3876e-01, PNorm = 124.3645, GNorm = 0.5956, lr_0 = 8.0745e-04
Loss = 1.4785e-01, PNorm = 124.4657, GNorm = 0.7240, lr_0 = 8.0689e-04
Loss = 1.2679e-01, PNorm = 124.5593, GNorm = 0.6291, lr_0 = 8.0634e-04
Loss = 1.5336e-01, PNorm = 124.6551, GNorm = 1.2259, lr_0 = 8.0579e-04
Loss = 1.3679e-01, PNorm = 124.7605, GNorm = 0.5210, lr_0 = 8.0523e-04
Loss = 1.3974e-01, PNorm = 124.8639, GNorm = 0.7654, lr_0 = 8.0468e-04
Loss = 1.2839e-01, PNorm = 124.9689, GNorm = 0.8931, lr_0 = 8.0413e-04
Loss = 1.3041e-01, PNorm = 125.0702, GNorm = 0.7505, lr_0 = 8.0358e-04
Loss = 1.3367e-01, PNorm = 125.1639, GNorm = 0.9167, lr_0 = 8.0303e-04
Loss = 1.4022e-01, PNorm = 125.2604, GNorm = 0.7393, lr_0 = 8.0248e-04
Loss = 2.0199e-01, PNorm = 125.3519, GNorm = 0.5969, lr_0 = 8.0193e-04
Loss = 1.6481e-01, PNorm = 125.4547, GNorm = 0.8909, lr_0 = 8.0138e-04
Loss = 1.3013e-01, PNorm = 125.5511, GNorm = 1.2428, lr_0 = 8.0083e-04
Loss = 1.5067e-01, PNorm = 125.6506, GNorm = 0.8077, lr_0 = 8.0028e-04
Loss = 1.4071e-01, PNorm = 125.7528, GNorm = 0.9996, lr_0 = 7.9974e-04
Loss = 1.4918e-01, PNorm = 125.8563, GNorm = 0.7855, lr_0 = 7.9919e-04
Loss = 1.4509e-01, PNorm = 125.9573, GNorm = 1.2353, lr_0 = 7.9864e-04
Loss = 1.2103e-01, PNorm = 126.0647, GNorm = 0.9478, lr_0 = 7.9809e-04
Loss = 1.4276e-01, PNorm = 126.1695, GNorm = 1.0315, lr_0 = 7.9755e-04
Loss = 1.4672e-01, PNorm = 126.2730, GNorm = 0.8618, lr_0 = 7.9700e-04
Loss = 1.2601e-01, PNorm = 126.3707, GNorm = 0.9205, lr_0 = 7.9645e-04
Loss = 1.4997e-01, PNorm = 126.4644, GNorm = 0.7909, lr_0 = 7.9591e-04
Loss = 1.6216e-01, PNorm = 126.5646, GNorm = 0.8069, lr_0 = 7.9536e-04
Loss = 1.5509e-01, PNorm = 126.6684, GNorm = 0.8016, lr_0 = 7.9482e-04
Loss = 1.3673e-01, PNorm = 126.7833, GNorm = 0.6901, lr_0 = 7.9427e-04
Loss = 1.7039e-01, PNorm = 126.8993, GNorm = 0.8658, lr_0 = 7.9373e-04
Loss = 1.2986e-01, PNorm = 127.0129, GNorm = 0.7505, lr_0 = 7.9319e-04
Loss = 1.4450e-01, PNorm = 127.1222, GNorm = 0.7735, lr_0 = 7.9264e-04
Loss = 1.5731e-01, PNorm = 127.2402, GNorm = 0.7429, lr_0 = 7.9210e-04
Loss = 1.3462e-01, PNorm = 127.3426, GNorm = 0.8637, lr_0 = 7.9156e-04
Loss = 1.3250e-01, PNorm = 127.4509, GNorm = 0.7219, lr_0 = 7.9101e-04
Loss = 1.5412e-01, PNorm = 127.5472, GNorm = 0.8858, lr_0 = 7.9047e-04
Loss = 1.2258e-01, PNorm = 127.6513, GNorm = 1.0166, lr_0 = 7.8993e-04
Loss = 1.4442e-01, PNorm = 127.7584, GNorm = 0.5401, lr_0 = 7.8939e-04
Loss = 1.6370e-01, PNorm = 127.8597, GNorm = 1.0867, lr_0 = 7.8885e-04
Loss = 1.3685e-01, PNorm = 127.9687, GNorm = 0.9455, lr_0 = 7.8831e-04
Loss = 1.3790e-01, PNorm = 128.0641, GNorm = 0.6076, lr_0 = 7.8777e-04
Loss = 1.7054e-01, PNorm = 128.1659, GNorm = 1.5009, lr_0 = 7.8723e-04
Loss = 1.4173e-01, PNorm = 128.2690, GNorm = 0.8946, lr_0 = 7.8669e-04
Loss = 1.4031e-01, PNorm = 128.3695, GNorm = 0.8411, lr_0 = 7.8615e-04
Loss = 1.2888e-01, PNorm = 128.4815, GNorm = 0.6134, lr_0 = 7.8561e-04
Loss = 1.6485e-01, PNorm = 128.5840, GNorm = 0.6829, lr_0 = 7.8507e-04
Loss = 1.5512e-01, PNorm = 128.6885, GNorm = 0.9848, lr_0 = 7.8454e-04
Loss = 1.4863e-01, PNorm = 128.7836, GNorm = 0.5860, lr_0 = 7.8400e-04
Loss = 1.4335e-01, PNorm = 128.8819, GNorm = 0.7379, lr_0 = 7.8346e-04
Loss = 1.5019e-01, PNorm = 128.9844, GNorm = 0.6039, lr_0 = 7.8293e-04
Loss = 1.4254e-01, PNorm = 129.0938, GNorm = 0.9771, lr_0 = 7.8239e-04
Loss = 1.6724e-01, PNorm = 129.2035, GNorm = 0.8398, lr_0 = 7.8185e-04
Loss = 1.3501e-01, PNorm = 129.3165, GNorm = 0.7154, lr_0 = 7.8132e-04
Validation mae = 0.125023
Epoch 5
Loss = 8.6998e-02, PNorm = 129.4133, GNorm = 0.7205, lr_0 = 7.8078e-04
Loss = 9.8019e-02, PNorm = 129.4882, GNorm = 0.6161, lr_0 = 7.8025e-04
Loss = 9.5600e-02, PNorm = 129.5585, GNorm = 0.5020, lr_0 = 7.7971e-04
Loss = 1.0902e-01, PNorm = 129.6264, GNorm = 1.2073, lr_0 = 7.7918e-04
Loss = 7.4777e-02, PNorm = 129.6868, GNorm = 0.6161, lr_0 = 7.7864e-04
Loss = 9.3684e-02, PNorm = 129.7489, GNorm = 0.4552, lr_0 = 7.7811e-04
Loss = 8.4133e-02, PNorm = 129.8075, GNorm = 0.3966, lr_0 = 7.7758e-04
Loss = 7.6945e-02, PNorm = 129.8617, GNorm = 0.5819, lr_0 = 7.7705e-04
Loss = 8.3263e-02, PNorm = 129.9113, GNorm = 0.8390, lr_0 = 7.7651e-04
Loss = 7.8427e-02, PNorm = 129.9607, GNorm = 0.7150, lr_0 = 7.7598e-04
Loss = 8.1233e-02, PNorm = 130.0219, GNorm = 0.5583, lr_0 = 7.7545e-04
Loss = 8.3323e-02, PNorm = 130.0680, GNorm = 0.6718, lr_0 = 7.7492e-04
Loss = 8.2130e-02, PNorm = 130.1233, GNorm = 0.3455, lr_0 = 7.7439e-04
Loss = 8.8184e-02, PNorm = 130.1784, GNorm = 0.6039, lr_0 = 7.7386e-04
Loss = 9.0047e-02, PNorm = 130.2407, GNorm = 0.7372, lr_0 = 7.7333e-04
Loss = 6.9449e-02, PNorm = 130.2958, GNorm = 0.5366, lr_0 = 7.7280e-04
Loss = 6.6828e-02, PNorm = 130.3579, GNorm = 0.5213, lr_0 = 7.7227e-04
Loss = 8.3663e-02, PNorm = 130.4149, GNorm = 0.5008, lr_0 = 7.7174e-04
Loss = 8.5831e-02, PNorm = 130.4790, GNorm = 0.5138, lr_0 = 7.7121e-04
Loss = 8.5376e-02, PNorm = 130.5522, GNorm = 0.6302, lr_0 = 7.7068e-04
Loss = 8.2289e-02, PNorm = 130.6158, GNorm = 0.5588, lr_0 = 7.7015e-04
Loss = 7.9144e-02, PNorm = 130.6784, GNorm = 0.5977, lr_0 = 7.6963e-04
Loss = 7.6396e-02, PNorm = 130.7422, GNorm = 0.7758, lr_0 = 7.6910e-04
Loss = 9.6155e-02, PNorm = 130.8028, GNorm = 0.9305, lr_0 = 7.6857e-04
Loss = 8.5057e-02, PNorm = 130.8645, GNorm = 0.6617, lr_0 = 7.6805e-04
Loss = 8.1747e-02, PNorm = 130.9292, GNorm = 0.7445, lr_0 = 7.6752e-04
Loss = 9.6092e-02, PNorm = 130.9956, GNorm = 0.4402, lr_0 = 7.6699e-04
Loss = 7.9952e-02, PNorm = 131.0623, GNorm = 1.5327, lr_0 = 7.6647e-04
Loss = 8.9945e-02, PNorm = 131.1239, GNorm = 1.7601, lr_0 = 7.6594e-04
Loss = 8.7232e-02, PNorm = 131.1753, GNorm = 0.8123, lr_0 = 7.6542e-04
Loss = 8.5704e-02, PNorm = 131.2353, GNorm = 0.6862, lr_0 = 7.6489e-04
Loss = 1.1774e-01, PNorm = 131.3001, GNorm = 0.7070, lr_0 = 7.6437e-04
Loss = 8.3278e-02, PNorm = 131.3734, GNorm = 0.5008, lr_0 = 7.6385e-04
Loss = 9.2045e-02, PNorm = 131.4288, GNorm = 0.5608, lr_0 = 7.6332e-04
Loss = 6.0660e-02, PNorm = 131.4864, GNorm = 0.5023, lr_0 = 7.6280e-04
Loss = 7.7563e-02, PNorm = 131.5383, GNorm = 0.4264, lr_0 = 7.6228e-04
Loss = 8.4370e-02, PNorm = 131.5989, GNorm = 0.6394, lr_0 = 7.6176e-04
Loss = 9.3269e-02, PNorm = 131.6604, GNorm = 0.5638, lr_0 = 7.6123e-04
Loss = 8.5560e-02, PNorm = 131.7334, GNorm = 0.7418, lr_0 = 7.6071e-04
Loss = 8.0808e-02, PNorm = 131.7995, GNorm = 0.9012, lr_0 = 7.6019e-04
Loss = 8.7241e-02, PNorm = 131.8579, GNorm = 0.5459, lr_0 = 7.5967e-04
Loss = 1.0185e-01, PNorm = 131.9377, GNorm = 0.4720, lr_0 = 7.5915e-04
Loss = 8.5433e-02, PNorm = 132.0013, GNorm = 1.0881, lr_0 = 7.5863e-04
Loss = 9.1914e-02, PNorm = 132.0815, GNorm = 0.6545, lr_0 = 7.5811e-04
Loss = 1.0238e-01, PNorm = 132.1522, GNorm = 0.5473, lr_0 = 7.5759e-04
Loss = 8.8101e-02, PNorm = 132.2347, GNorm = 0.7393, lr_0 = 7.5707e-04
Loss = 8.0729e-02, PNorm = 132.3084, GNorm = 0.6704, lr_0 = 7.5655e-04
Loss = 7.3260e-02, PNorm = 132.3835, GNorm = 0.4231, lr_0 = 7.5603e-04
Loss = 9.1081e-02, PNorm = 132.4565, GNorm = 0.6196, lr_0 = 7.5552e-04
Loss = 9.3009e-02, PNorm = 132.5318, GNorm = 0.7942, lr_0 = 7.5500e-04
Loss = 8.9169e-02, PNorm = 132.6008, GNorm = 0.4795, lr_0 = 7.5448e-04
Loss = 7.9526e-02, PNorm = 132.6606, GNorm = 0.5084, lr_0 = 7.5397e-04
Loss = 9.3192e-02, PNorm = 132.7411, GNorm = 0.8041, lr_0 = 7.5345e-04
Loss = 8.7121e-02, PNorm = 132.8091, GNorm = 0.5996, lr_0 = 7.5293e-04
Loss = 9.0230e-02, PNorm = 132.8857, GNorm = 0.4764, lr_0 = 7.5242e-04
Loss = 1.0217e-01, PNorm = 132.9565, GNorm = 0.6248, lr_0 = 7.5190e-04
Loss = 7.1013e-02, PNorm = 133.0288, GNorm = 0.4479, lr_0 = 7.5139e-04
Loss = 7.9222e-02, PNorm = 133.0923, GNorm = 0.4628, lr_0 = 7.5087e-04
Loss = 8.7815e-02, PNorm = 133.1605, GNorm = 0.4282, lr_0 = 7.5036e-04
Loss = 8.8685e-02, PNorm = 133.2299, GNorm = 0.9614, lr_0 = 7.4984e-04
Loss = 9.5742e-02, PNorm = 133.3135, GNorm = 0.5431, lr_0 = 7.4933e-04
Loss = 8.8541e-02, PNorm = 133.3914, GNorm = 0.5020, lr_0 = 7.4882e-04
Loss = 7.1518e-02, PNorm = 133.4657, GNorm = 0.4531, lr_0 = 7.4830e-04
Loss = 9.5213e-02, PNorm = 133.5266, GNorm = 0.8231, lr_0 = 7.4779e-04
Loss = 7.8854e-02, PNorm = 133.6073, GNorm = 0.5579, lr_0 = 7.4728e-04
Loss = 9.5966e-02, PNorm = 133.6782, GNorm = 0.4935, lr_0 = 7.4677e-04
Loss = 8.2097e-02, PNorm = 133.7575, GNorm = 0.7100, lr_0 = 7.4625e-04
Loss = 9.3997e-02, PNorm = 133.8332, GNorm = 0.9228, lr_0 = 7.4574e-04
Loss = 8.8083e-02, PNorm = 133.9090, GNorm = 0.6054, lr_0 = 7.4523e-04
Loss = 8.3604e-02, PNorm = 133.9886, GNorm = 0.5992, lr_0 = 7.4472e-04
Loss = 8.6556e-02, PNorm = 134.0609, GNorm = 0.5774, lr_0 = 7.4421e-04
Loss = 1.0696e-01, PNorm = 134.1272, GNorm = 1.1952, lr_0 = 7.4370e-04
Loss = 8.3400e-02, PNorm = 134.2006, GNorm = 1.6720, lr_0 = 7.4319e-04
Loss = 1.0309e-01, PNorm = 134.2869, GNorm = 0.4784, lr_0 = 7.4268e-04
Loss = 8.0412e-02, PNorm = 134.3711, GNorm = 0.6702, lr_0 = 7.4217e-04
Loss = 9.1908e-02, PNorm = 134.4537, GNorm = 0.7885, lr_0 = 7.4167e-04
Loss = 8.3873e-02, PNorm = 134.5413, GNorm = 0.8245, lr_0 = 7.4116e-04
Loss = 8.3482e-02, PNorm = 134.6153, GNorm = 0.7522, lr_0 = 7.4065e-04
Loss = 7.6450e-02, PNorm = 134.6889, GNorm = 0.5279, lr_0 = 7.4014e-04
Loss = 9.1465e-02, PNorm = 134.7610, GNorm = 0.8229, lr_0 = 7.3964e-04
Loss = 1.0665e-01, PNorm = 134.8380, GNorm = 0.7636, lr_0 = 7.3913e-04
Loss = 8.1900e-02, PNorm = 134.9197, GNorm = 1.1196, lr_0 = 7.3862e-04
Loss = 9.2427e-02, PNorm = 135.0036, GNorm = 0.7236, lr_0 = 7.3812e-04
Loss = 9.3752e-02, PNorm = 135.0744, GNorm = 0.6835, lr_0 = 7.3761e-04
Loss = 1.0591e-01, PNorm = 135.1584, GNorm = 0.7526, lr_0 = 7.3711e-04
Loss = 1.0537e-01, PNorm = 135.2466, GNorm = 0.5662, lr_0 = 7.3660e-04
Loss = 9.7574e-02, PNorm = 135.3417, GNorm = 0.6580, lr_0 = 7.3610e-04
Loss = 8.8151e-02, PNorm = 135.4274, GNorm = 0.6568, lr_0 = 7.3559e-04
Loss = 1.0407e-01, PNorm = 135.5103, GNorm = 0.4869, lr_0 = 7.3509e-04
Loss = 9.8432e-02, PNorm = 135.5961, GNorm = 0.5638, lr_0 = 7.3458e-04
Loss = 9.7964e-02, PNorm = 135.6774, GNorm = 1.4465, lr_0 = 7.3408e-04
Loss = 9.0862e-02, PNorm = 135.7646, GNorm = 0.5918, lr_0 = 7.3358e-04
Loss = 1.0203e-01, PNorm = 135.8460, GNorm = 0.7012, lr_0 = 7.3308e-04
Loss = 8.4866e-02, PNorm = 135.9303, GNorm = 0.4742, lr_0 = 7.3257e-04
Loss = 1.0781e-01, PNorm = 136.0009, GNorm = 0.6806, lr_0 = 7.3207e-04
Loss = 1.1062e-01, PNorm = 136.0875, GNorm = 0.9600, lr_0 = 7.3157e-04
Loss = 8.3971e-02, PNorm = 136.1699, GNorm = 0.4207, lr_0 = 7.3107e-04
Loss = 9.2325e-02, PNorm = 136.2522, GNorm = 0.5305, lr_0 = 7.3057e-04
Loss = 8.2336e-02, PNorm = 136.3329, GNorm = 0.7881, lr_0 = 7.3007e-04
Loss = 1.3237e-01, PNorm = 136.4199, GNorm = 0.6573, lr_0 = 7.2957e-04
Loss = 8.5328e-02, PNorm = 136.4986, GNorm = 0.4587, lr_0 = 7.2907e-04
Loss = 9.5739e-02, PNorm = 136.5825, GNorm = 0.6819, lr_0 = 7.2857e-04
Loss = 8.6606e-02, PNorm = 136.6613, GNorm = 0.6232, lr_0 = 7.2807e-04
Loss = 8.5506e-02, PNorm = 136.7377, GNorm = 0.5535, lr_0 = 7.2757e-04
Loss = 7.8584e-02, PNorm = 136.8149, GNorm = 0.8384, lr_0 = 7.2707e-04
Loss = 1.0281e-01, PNorm = 136.8889, GNorm = 0.9029, lr_0 = 7.2657e-04
Loss = 1.3802e-01, PNorm = 136.9723, GNorm = 1.2605, lr_0 = 7.2608e-04
Loss = 9.6058e-02, PNorm = 137.0622, GNorm = 0.6461, lr_0 = 7.2558e-04
Loss = 8.3385e-02, PNorm = 137.1491, GNorm = 0.4929, lr_0 = 7.2508e-04
Loss = 9.5561e-02, PNorm = 137.2355, GNorm = 0.7679, lr_0 = 7.2458e-04
Loss = 1.0185e-01, PNorm = 137.3225, GNorm = 0.6055, lr_0 = 7.2409e-04
Loss = 1.0244e-01, PNorm = 137.4040, GNorm = 0.6098, lr_0 = 7.2359e-04
Loss = 1.0022e-01, PNorm = 137.4889, GNorm = 0.8751, lr_0 = 7.2310e-04
Loss = 8.9906e-02, PNorm = 137.5756, GNorm = 0.5510, lr_0 = 7.2260e-04
Loss = 1.2274e-01, PNorm = 137.6670, GNorm = 0.5803, lr_0 = 7.2211e-04
Loss = 9.8465e-02, PNorm = 137.7580, GNorm = 0.5778, lr_0 = 7.2161e-04
Loss = 1.1983e-01, PNorm = 137.8481, GNorm = 0.6401, lr_0 = 7.2112e-04
Loss = 1.0310e-01, PNorm = 137.9386, GNorm = 0.9378, lr_0 = 7.2062e-04
Loss = 9.8027e-02, PNorm = 138.0313, GNorm = 1.0808, lr_0 = 7.2013e-04
Loss = 9.2461e-02, PNorm = 138.1161, GNorm = 0.6916, lr_0 = 7.1964e-04
Validation mae = 0.123750
Epoch 6
Loss = 7.1232e-02, PNorm = 138.1945, GNorm = 0.4409, lr_0 = 7.1914e-04
Loss = 6.5048e-02, PNorm = 138.2590, GNorm = 0.4346, lr_0 = 7.1865e-04
Loss = 6.7584e-02, PNorm = 138.3116, GNorm = 0.4974, lr_0 = 7.1816e-04
Loss = 6.0516e-02, PNorm = 138.3655, GNorm = 1.0936, lr_0 = 7.1767e-04
Loss = 6.9937e-02, PNorm = 138.4176, GNorm = 0.3550, lr_0 = 7.1717e-04
Loss = 6.8900e-02, PNorm = 138.4711, GNorm = 0.4913, lr_0 = 7.1668e-04
Loss = 6.8852e-02, PNorm = 138.5306, GNorm = 0.3719, lr_0 = 7.1619e-04
Loss = 6.5517e-02, PNorm = 138.5832, GNorm = 1.1203, lr_0 = 7.1570e-04
Loss = 6.1292e-02, PNorm = 138.6418, GNorm = 0.4296, lr_0 = 7.1521e-04
Loss = 6.1374e-02, PNorm = 138.6928, GNorm = 0.7986, lr_0 = 7.1472e-04
Loss = 6.7141e-02, PNorm = 138.7472, GNorm = 0.4617, lr_0 = 7.1423e-04
Loss = 7.0492e-02, PNorm = 138.7967, GNorm = 0.5346, lr_0 = 7.1374e-04
Loss = 7.2134e-02, PNorm = 138.8498, GNorm = 0.4337, lr_0 = 7.1325e-04
Loss = 5.5184e-02, PNorm = 138.8992, GNorm = 0.6272, lr_0 = 7.1277e-04
Loss = 5.5546e-02, PNorm = 138.9483, GNorm = 0.4241, lr_0 = 7.1228e-04
Loss = 6.0399e-02, PNorm = 138.9985, GNorm = 0.3173, lr_0 = 7.1179e-04
Loss = 5.7326e-02, PNorm = 139.0502, GNorm = 0.5514, lr_0 = 7.1130e-04
Loss = 5.6454e-02, PNorm = 139.1073, GNorm = 0.4513, lr_0 = 7.1081e-04
Loss = 5.8004e-02, PNorm = 139.1554, GNorm = 0.6029, lr_0 = 7.1033e-04
Loss = 5.5942e-02, PNorm = 139.2078, GNorm = 0.5063, lr_0 = 7.0984e-04
Loss = 6.3965e-02, PNorm = 139.2586, GNorm = 0.3236, lr_0 = 7.0935e-04
Loss = 6.6821e-02, PNorm = 139.3101, GNorm = 0.4971, lr_0 = 7.0887e-04
Loss = 6.3325e-02, PNorm = 139.3662, GNorm = 0.4698, lr_0 = 7.0838e-04
Loss = 5.7853e-02, PNorm = 139.4164, GNorm = 0.3015, lr_0 = 7.0790e-04
Loss = 6.1427e-02, PNorm = 139.4705, GNorm = 0.6935, lr_0 = 7.0741e-04
Loss = 6.0648e-02, PNorm = 139.5236, GNorm = 0.7578, lr_0 = 7.0693e-04
Loss = 6.0766e-02, PNorm = 139.5807, GNorm = 0.6364, lr_0 = 7.0644e-04
Loss = 6.4374e-02, PNorm = 139.6332, GNorm = 0.5936, lr_0 = 7.0596e-04
Loss = 6.1889e-02, PNorm = 139.6917, GNorm = 0.8315, lr_0 = 7.0548e-04
Loss = 5.7697e-02, PNorm = 139.7462, GNorm = 0.6124, lr_0 = 7.0499e-04
Loss = 5.4475e-02, PNorm = 139.8033, GNorm = 0.7348, lr_0 = 7.0451e-04
Loss = 5.8321e-02, PNorm = 139.8533, GNorm = 0.3898, lr_0 = 7.0403e-04
Loss = 6.8235e-02, PNorm = 139.8947, GNorm = 1.0066, lr_0 = 7.0354e-04
Loss = 7.9806e-02, PNorm = 139.9473, GNorm = 0.6612, lr_0 = 7.0306e-04
Loss = 7.9310e-02, PNorm = 140.0114, GNorm = 0.5642, lr_0 = 7.0258e-04
Loss = 6.4879e-02, PNorm = 140.0724, GNorm = 0.5493, lr_0 = 7.0210e-04
Loss = 7.8675e-02, PNorm = 140.1298, GNorm = 0.7028, lr_0 = 7.0162e-04
Loss = 6.1132e-02, PNorm = 140.1930, GNorm = 0.4579, lr_0 = 7.0114e-04
Loss = 5.4127e-02, PNorm = 140.2448, GNorm = 0.4640, lr_0 = 7.0066e-04
Loss = 5.7096e-02, PNorm = 140.3062, GNorm = 0.6147, lr_0 = 7.0018e-04
Loss = 5.6542e-02, PNorm = 140.3624, GNorm = 0.4086, lr_0 = 6.9970e-04
Loss = 5.5700e-02, PNorm = 140.4140, GNorm = 0.7253, lr_0 = 6.9922e-04
Loss = 5.5467e-02, PNorm = 140.4671, GNorm = 0.3229, lr_0 = 6.9874e-04
Loss = 6.3605e-02, PNorm = 140.5236, GNorm = 0.4033, lr_0 = 6.9826e-04
Loss = 6.1190e-02, PNorm = 140.5799, GNorm = 0.5721, lr_0 = 6.9778e-04
Loss = 6.3796e-02, PNorm = 140.6409, GNorm = 0.4663, lr_0 = 6.9730e-04
Loss = 6.7472e-02, PNorm = 140.7059, GNorm = 0.6996, lr_0 = 6.9683e-04
Loss = 5.8016e-02, PNorm = 140.7686, GNorm = 0.4205, lr_0 = 6.9635e-04
Loss = 5.6730e-02, PNorm = 140.8295, GNorm = 0.4762, lr_0 = 6.9587e-04
Loss = 6.1433e-02, PNorm = 140.8848, GNorm = 0.6143, lr_0 = 6.9540e-04
Loss = 5.5144e-02, PNorm = 140.9402, GNorm = 0.5596, lr_0 = 6.9492e-04
Loss = 7.4102e-02, PNorm = 141.0024, GNorm = 0.7308, lr_0 = 6.9444e-04
Loss = 5.6159e-02, PNorm = 141.0650, GNorm = 0.5239, lr_0 = 6.9397e-04
Loss = 5.9125e-02, PNorm = 141.1235, GNorm = 0.5697, lr_0 = 6.9349e-04
Loss = 8.0018e-02, PNorm = 141.1810, GNorm = 0.5796, lr_0 = 6.9302e-04
Loss = 6.7764e-02, PNorm = 141.2445, GNorm = 0.5919, lr_0 = 6.9254e-04
Loss = 6.0058e-02, PNorm = 141.3024, GNorm = 0.4259, lr_0 = 6.9207e-04
Loss = 6.8242e-02, PNorm = 141.3657, GNorm = 0.4706, lr_0 = 6.9159e-04
Loss = 6.3775e-02, PNorm = 141.4300, GNorm = 0.4767, lr_0 = 6.9112e-04
Loss = 6.1552e-02, PNorm = 141.4976, GNorm = 0.4851, lr_0 = 6.9065e-04
Loss = 7.8370e-02, PNorm = 141.5652, GNorm = 0.6493, lr_0 = 6.9017e-04
Loss = 6.8930e-02, PNorm = 141.6260, GNorm = 0.4381, lr_0 = 6.8970e-04
Loss = 5.7789e-02, PNorm = 141.6920, GNorm = 1.3713, lr_0 = 6.8923e-04
Loss = 6.6697e-02, PNorm = 141.7517, GNorm = 0.7255, lr_0 = 6.8876e-04
Loss = 8.3644e-02, PNorm = 141.8208, GNorm = 0.7641, lr_0 = 6.8828e-04
Loss = 6.3288e-02, PNorm = 141.8848, GNorm = 1.2375, lr_0 = 6.8781e-04
Loss = 5.4135e-02, PNorm = 141.9543, GNorm = 0.4558, lr_0 = 6.8734e-04
Loss = 7.1854e-02, PNorm = 142.0244, GNorm = 0.5053, lr_0 = 6.8687e-04
Loss = 7.6844e-02, PNorm = 142.0980, GNorm = 0.8592, lr_0 = 6.8640e-04
Loss = 5.7498e-02, PNorm = 142.1715, GNorm = 0.5554, lr_0 = 6.8593e-04
Loss = 7.6339e-02, PNorm = 142.2383, GNorm = 0.6251, lr_0 = 6.8546e-04
Loss = 6.0897e-02, PNorm = 142.3050, GNorm = 0.5211, lr_0 = 6.8499e-04
Loss = 6.0967e-02, PNorm = 142.3674, GNorm = 0.7488, lr_0 = 6.8452e-04
Loss = 7.2855e-02, PNorm = 142.4345, GNorm = 0.5679, lr_0 = 6.8405e-04
Loss = 6.0933e-02, PNorm = 142.4933, GNorm = 0.6155, lr_0 = 6.8358e-04
Loss = 6.9596e-02, PNorm = 142.5567, GNorm = 1.2206, lr_0 = 6.8312e-04
Loss = 6.6853e-02, PNorm = 142.6219, GNorm = 0.7569, lr_0 = 6.8265e-04
Loss = 6.0978e-02, PNorm = 142.6873, GNorm = 0.8678, lr_0 = 6.8218e-04
Loss = 6.7556e-02, PNorm = 142.7479, GNorm = 0.5009, lr_0 = 6.8171e-04
Loss = 6.7452e-02, PNorm = 142.8185, GNorm = 0.3998, lr_0 = 6.8125e-04
Loss = 8.1481e-02, PNorm = 142.8828, GNorm = 0.6851, lr_0 = 6.8078e-04
Loss = 7.1813e-02, PNorm = 142.9600, GNorm = 0.9625, lr_0 = 6.8031e-04
Loss = 7.0547e-02, PNorm = 143.0283, GNorm = 0.5209, lr_0 = 6.7985e-04
Loss = 7.1214e-02, PNorm = 143.1008, GNorm = 0.4630, lr_0 = 6.7938e-04
Loss = 6.0221e-02, PNorm = 143.1682, GNorm = 0.4192, lr_0 = 6.7892e-04
Loss = 7.3284e-02, PNorm = 143.2297, GNorm = 0.7132, lr_0 = 6.7845e-04
Loss = 6.3558e-02, PNorm = 143.2944, GNorm = 0.4196, lr_0 = 6.7799e-04
Loss = 6.8264e-02, PNorm = 143.3537, GNorm = 0.6771, lr_0 = 6.7752e-04
Loss = 7.5898e-02, PNorm = 143.4213, GNorm = 0.5048, lr_0 = 6.7706e-04
Loss = 6.1077e-02, PNorm = 143.4847, GNorm = 0.8176, lr_0 = 6.7659e-04
Loss = 6.6811e-02, PNorm = 143.5468, GNorm = 0.6084, lr_0 = 6.7613e-04
Loss = 6.6872e-02, PNorm = 143.6170, GNorm = 0.4020, lr_0 = 6.7567e-04
Loss = 7.9002e-02, PNorm = 143.6860, GNorm = 0.3707, lr_0 = 6.7520e-04
Loss = 6.5684e-02, PNorm = 143.7586, GNorm = 0.8014, lr_0 = 6.7474e-04
Loss = 7.2865e-02, PNorm = 143.8327, GNorm = 0.7791, lr_0 = 6.7428e-04
Loss = 7.0595e-02, PNorm = 143.9020, GNorm = 0.9766, lr_0 = 6.7382e-04
Loss = 5.4659e-02, PNorm = 143.9711, GNorm = 0.4632, lr_0 = 6.7335e-04
Loss = 9.0474e-02, PNorm = 144.0390, GNorm = 0.5419, lr_0 = 6.7289e-04
Loss = 7.0405e-02, PNorm = 144.1060, GNorm = 1.1241, lr_0 = 6.7243e-04
Loss = 7.1782e-02, PNorm = 144.1805, GNorm = 1.1381, lr_0 = 6.7197e-04
Loss = 7.8348e-02, PNorm = 144.2510, GNorm = 0.3430, lr_0 = 6.7151e-04
Loss = 6.8045e-02, PNorm = 144.3292, GNorm = 0.3984, lr_0 = 6.7105e-04
Loss = 6.6129e-02, PNorm = 144.4016, GNorm = 0.4169, lr_0 = 6.7059e-04
Loss = 6.5616e-02, PNorm = 144.4672, GNorm = 0.5850, lr_0 = 6.7013e-04
Loss = 6.8829e-02, PNorm = 144.5371, GNorm = 0.5348, lr_0 = 6.6967e-04
Loss = 7.2369e-02, PNorm = 144.6063, GNorm = 0.4741, lr_0 = 6.6921e-04
Loss = 7.2510e-02, PNorm = 144.6810, GNorm = 0.9334, lr_0 = 6.6876e-04
Loss = 7.1707e-02, PNorm = 144.7550, GNorm = 0.5850, lr_0 = 6.6830e-04
Loss = 6.8363e-02, PNorm = 144.8313, GNorm = 0.4687, lr_0 = 6.6784e-04
Loss = 6.1652e-02, PNorm = 144.9029, GNorm = 0.5745, lr_0 = 6.6738e-04
Loss = 7.0846e-02, PNorm = 144.9744, GNorm = 0.4224, lr_0 = 6.6693e-04
Loss = 6.1219e-02, PNorm = 145.0423, GNorm = 0.7928, lr_0 = 6.6647e-04
Loss = 5.7501e-02, PNorm = 145.1027, GNorm = 0.5427, lr_0 = 6.6601e-04
Loss = 7.3873e-02, PNorm = 145.1658, GNorm = 0.4800, lr_0 = 6.6556e-04
Loss = 7.3633e-02, PNorm = 145.2310, GNorm = 0.9038, lr_0 = 6.6510e-04
Loss = 6.2938e-02, PNorm = 145.2967, GNorm = 0.6783, lr_0 = 6.6464e-04
Loss = 6.9912e-02, PNorm = 145.3624, GNorm = 1.2876, lr_0 = 6.6419e-04
Loss = 7.1284e-02, PNorm = 145.4345, GNorm = 0.4266, lr_0 = 6.6373e-04
Loss = 8.1327e-02, PNorm = 145.5037, GNorm = 0.7640, lr_0 = 6.6328e-04
Loss = 6.6218e-02, PNorm = 145.5782, GNorm = 0.4974, lr_0 = 6.6282e-04
Validation mae = 0.123874
Epoch 7
Loss = 5.0777e-02, PNorm = 145.6422, GNorm = 0.3357, lr_0 = 6.6237e-04
Loss = 4.7331e-02, PNorm = 145.6941, GNorm = 0.4545, lr_0 = 6.6192e-04
Loss = 4.5452e-02, PNorm = 145.7443, GNorm = 0.4559, lr_0 = 6.6146e-04
Loss = 4.8013e-02, PNorm = 145.7834, GNorm = 0.4297, lr_0 = 6.6101e-04
Loss = 5.5876e-02, PNorm = 145.8300, GNorm = 0.6721, lr_0 = 6.6056e-04
Loss = 5.0129e-02, PNorm = 145.8762, GNorm = 0.3393, lr_0 = 6.6011e-04
Loss = 4.8025e-02, PNorm = 145.9168, GNorm = 0.8722, lr_0 = 6.5965e-04
Loss = 5.0726e-02, PNorm = 145.9601, GNorm = 0.2797, lr_0 = 6.5920e-04
Loss = 3.9919e-02, PNorm = 146.0017, GNorm = 0.3749, lr_0 = 6.5875e-04
Loss = 4.4964e-02, PNorm = 146.0371, GNorm = 0.3393, lr_0 = 6.5830e-04
Loss = 4.5614e-02, PNorm = 146.0778, GNorm = 0.3318, lr_0 = 6.5785e-04
Loss = 5.6946e-02, PNorm = 146.1221, GNorm = 1.1796, lr_0 = 6.5740e-04
Loss = 4.2400e-02, PNorm = 146.1654, GNorm = 0.5698, lr_0 = 6.5695e-04
Loss = 4.7916e-02, PNorm = 146.2107, GNorm = 0.2478, lr_0 = 6.5650e-04
Loss = 5.4557e-02, PNorm = 146.2512, GNorm = 0.3857, lr_0 = 6.5605e-04
Loss = 4.6748e-02, PNorm = 146.2961, GNorm = 0.3483, lr_0 = 6.5560e-04
Loss = 4.5476e-02, PNorm = 146.3364, GNorm = 0.8595, lr_0 = 6.5515e-04
Loss = 4.8782e-02, PNorm = 146.3779, GNorm = 0.3859, lr_0 = 6.5470e-04
Loss = 4.1697e-02, PNorm = 146.4192, GNorm = 0.4511, lr_0 = 6.5425e-04
Loss = 4.2163e-02, PNorm = 146.4601, GNorm = 0.3076, lr_0 = 6.5380e-04
Loss = 4.3115e-02, PNorm = 146.5073, GNorm = 0.3887, lr_0 = 6.5335e-04
Loss = 4.3221e-02, PNorm = 146.5489, GNorm = 0.7014, lr_0 = 6.5291e-04
Loss = 4.9266e-02, PNorm = 146.5975, GNorm = 0.6761, lr_0 = 6.5246e-04
Loss = 4.1927e-02, PNorm = 146.6439, GNorm = 0.2867, lr_0 = 6.5201e-04
Loss = 4.3314e-02, PNorm = 146.6878, GNorm = 0.3463, lr_0 = 6.5157e-04
Loss = 4.8062e-02, PNorm = 146.7306, GNorm = 0.4090, lr_0 = 6.5112e-04
Loss = 4.4828e-02, PNorm = 146.7791, GNorm = 0.4541, lr_0 = 6.5067e-04
Loss = 4.9601e-02, PNorm = 146.8286, GNorm = 0.5235, lr_0 = 6.5023e-04
Loss = 4.8649e-02, PNorm = 146.8753, GNorm = 0.6311, lr_0 = 6.4978e-04
Loss = 5.6683e-02, PNorm = 146.9276, GNorm = 0.5628, lr_0 = 6.4934e-04
Loss = 4.3028e-02, PNorm = 146.9762, GNorm = 0.2657, lr_0 = 6.4889e-04
Loss = 5.3430e-02, PNorm = 147.0237, GNorm = 0.6411, lr_0 = 6.4845e-04
Loss = 4.8835e-02, PNorm = 147.0667, GNorm = 0.4983, lr_0 = 6.4800e-04
Loss = 5.3169e-02, PNorm = 147.1047, GNorm = 0.3105, lr_0 = 6.4756e-04
Loss = 4.3390e-02, PNorm = 147.1518, GNorm = 0.7066, lr_0 = 6.4712e-04
Loss = 5.2529e-02, PNorm = 147.2031, GNorm = 0.8671, lr_0 = 6.4667e-04
Loss = 5.1545e-02, PNorm = 147.2562, GNorm = 0.8297, lr_0 = 6.4623e-04
Loss = 5.9647e-02, PNorm = 147.3116, GNorm = 1.1020, lr_0 = 6.4579e-04
Loss = 4.6304e-02, PNorm = 147.3654, GNorm = 0.5615, lr_0 = 6.4534e-04
Loss = 4.2726e-02, PNorm = 147.4191, GNorm = 0.3008, lr_0 = 6.4490e-04
Loss = 4.3720e-02, PNorm = 147.4641, GNorm = 0.6393, lr_0 = 6.4446e-04
Loss = 3.8525e-02, PNorm = 147.5082, GNorm = 0.9804, lr_0 = 6.4402e-04
Loss = 4.6915e-02, PNorm = 147.5561, GNorm = 0.5298, lr_0 = 6.4358e-04
Loss = 5.4585e-02, PNorm = 147.5998, GNorm = 0.4402, lr_0 = 6.4314e-04
Loss = 5.5264e-02, PNorm = 147.6497, GNorm = 0.3881, lr_0 = 6.4270e-04
Loss = 5.1513e-02, PNorm = 147.7032, GNorm = 0.4175, lr_0 = 6.4226e-04
Loss = 5.5492e-02, PNorm = 147.7555, GNorm = 0.6960, lr_0 = 6.4182e-04
Loss = 4.8488e-02, PNorm = 147.8030, GNorm = 0.7670, lr_0 = 6.4138e-04
Loss = 4.4438e-02, PNorm = 147.8541, GNorm = 0.4569, lr_0 = 6.4094e-04
Loss = 4.4253e-02, PNorm = 147.9003, GNorm = 0.4088, lr_0 = 6.4050e-04
Loss = 4.5066e-02, PNorm = 147.9475, GNorm = 0.8245, lr_0 = 6.4006e-04
Loss = 4.9726e-02, PNorm = 147.9929, GNorm = 0.4771, lr_0 = 6.3962e-04
Loss = 5.6277e-02, PNorm = 148.0471, GNorm = 0.6458, lr_0 = 6.3918e-04
Loss = 5.3715e-02, PNorm = 148.1030, GNorm = 0.5742, lr_0 = 6.3874e-04
Loss = 5.0113e-02, PNorm = 148.1648, GNorm = 0.6384, lr_0 = 6.3831e-04
Loss = 4.1041e-02, PNorm = 148.2129, GNorm = 0.6569, lr_0 = 6.3787e-04
Loss = 7.7337e-02, PNorm = 148.2699, GNorm = 0.6053, lr_0 = 6.3743e-04
Loss = 4.8467e-02, PNorm = 148.3204, GNorm = 0.4888, lr_0 = 6.3700e-04
Loss = 4.8138e-02, PNorm = 148.3756, GNorm = 0.8056, lr_0 = 6.3656e-04
Loss = 5.8168e-02, PNorm = 148.4323, GNorm = 0.9089, lr_0 = 6.3612e-04
Loss = 5.9834e-02, PNorm = 148.4890, GNorm = 0.4474, lr_0 = 6.3569e-04
Loss = 4.4005e-02, PNorm = 148.5474, GNorm = 0.4663, lr_0 = 6.3525e-04
Loss = 4.6705e-02, PNorm = 148.5966, GNorm = 0.3836, lr_0 = 6.3482e-04
Loss = 4.8714e-02, PNorm = 148.6564, GNorm = 0.4906, lr_0 = 6.3438e-04
Loss = 5.6663e-02, PNorm = 148.7104, GNorm = 0.3972, lr_0 = 6.3395e-04
Loss = 6.2446e-02, PNorm = 148.7685, GNorm = 0.4034, lr_0 = 6.3351e-04
Loss = 4.3159e-02, PNorm = 148.8291, GNorm = 0.3353, lr_0 = 6.3308e-04
Loss = 5.4752e-02, PNorm = 148.8781, GNorm = 0.7252, lr_0 = 6.3265e-04
Loss = 6.6989e-02, PNorm = 148.9295, GNorm = 0.4286, lr_0 = 6.3221e-04
Loss = 4.9288e-02, PNorm = 148.9815, GNorm = 0.3917, lr_0 = 6.3178e-04
Loss = 5.1458e-02, PNorm = 149.0375, GNorm = 0.6554, lr_0 = 6.3135e-04
Loss = 4.8764e-02, PNorm = 149.0969, GNorm = 0.6713, lr_0 = 6.3091e-04
Loss = 4.6301e-02, PNorm = 149.1532, GNorm = 0.6990, lr_0 = 6.3048e-04
Loss = 6.3134e-02, PNorm = 149.2121, GNorm = 0.6344, lr_0 = 6.3005e-04
Loss = 5.8614e-02, PNorm = 149.2600, GNorm = 0.6617, lr_0 = 6.2962e-04
Loss = 5.2162e-02, PNorm = 149.3165, GNorm = 0.4520, lr_0 = 6.2919e-04
Loss = 5.8463e-02, PNorm = 149.3771, GNorm = 0.4199, lr_0 = 6.2876e-04
Loss = 5.2860e-02, PNorm = 149.4410, GNorm = 0.5981, lr_0 = 6.2833e-04
Loss = 4.5433e-02, PNorm = 149.5009, GNorm = 0.4202, lr_0 = 6.2789e-04
Loss = 5.4666e-02, PNorm = 149.5629, GNorm = 0.7711, lr_0 = 6.2746e-04
Loss = 4.4567e-02, PNorm = 149.6223, GNorm = 0.3523, lr_0 = 6.2703e-04
Loss = 4.8927e-02, PNorm = 149.6817, GNorm = 0.4682, lr_0 = 6.2661e-04
Loss = 4.3317e-02, PNorm = 149.7343, GNorm = 0.5957, lr_0 = 6.2618e-04
Loss = 5.6152e-02, PNorm = 149.7843, GNorm = 0.3775, lr_0 = 6.2575e-04
Loss = 4.6662e-02, PNorm = 149.8427, GNorm = 0.3257, lr_0 = 6.2532e-04
Loss = 5.3077e-02, PNorm = 149.8994, GNorm = 1.0003, lr_0 = 6.2489e-04
Loss = 4.6036e-02, PNorm = 149.9635, GNorm = 0.5222, lr_0 = 6.2446e-04
Loss = 4.7613e-02, PNorm = 150.0251, GNorm = 0.3989, lr_0 = 6.2403e-04
Loss = 5.7475e-02, PNorm = 150.0781, GNorm = 2.3471, lr_0 = 6.2361e-04
Loss = 4.4075e-02, PNorm = 150.1329, GNorm = 0.4884, lr_0 = 6.2318e-04
Loss = 6.0311e-02, PNorm = 150.1908, GNorm = 0.4807, lr_0 = 6.2275e-04
Loss = 4.6783e-02, PNorm = 150.2541, GNorm = 0.5968, lr_0 = 6.2233e-04
Loss = 5.1175e-02, PNorm = 150.3165, GNorm = 0.5642, lr_0 = 6.2190e-04
Loss = 5.5710e-02, PNorm = 150.3788, GNorm = 0.7827, lr_0 = 6.2147e-04
Loss = 5.3898e-02, PNorm = 150.4508, GNorm = 1.4433, lr_0 = 6.2105e-04
Loss = 5.0365e-02, PNorm = 150.5146, GNorm = 0.6480, lr_0 = 6.2062e-04
Loss = 5.1972e-02, PNorm = 150.5735, GNorm = 0.3990, lr_0 = 6.2020e-04
Loss = 4.7476e-02, PNorm = 150.6283, GNorm = 0.6039, lr_0 = 6.1977e-04
Loss = 5.0577e-02, PNorm = 150.6868, GNorm = 0.8041, lr_0 = 6.1935e-04
Loss = 5.2444e-02, PNorm = 150.7494, GNorm = 0.7959, lr_0 = 6.1892e-04
Loss = 4.2786e-02, PNorm = 150.8113, GNorm = 0.2725, lr_0 = 6.1850e-04
Loss = 6.3054e-02, PNorm = 150.8703, GNorm = 0.4321, lr_0 = 6.1808e-04
Loss = 4.2229e-02, PNorm = 150.9191, GNorm = 0.6545, lr_0 = 6.1765e-04
Loss = 4.7373e-02, PNorm = 150.9703, GNorm = 0.5739, lr_0 = 6.1723e-04
Loss = 5.2681e-02, PNorm = 151.0284, GNorm = 0.5580, lr_0 = 6.1681e-04
Loss = 4.3259e-02, PNorm = 151.0882, GNorm = 0.3295, lr_0 = 6.1638e-04
Loss = 4.8442e-02, PNorm = 151.1513, GNorm = 0.4183, lr_0 = 6.1596e-04
Loss = 5.0240e-02, PNorm = 151.2035, GNorm = 0.5060, lr_0 = 6.1554e-04
Loss = 4.1631e-02, PNorm = 151.2581, GNorm = 0.5021, lr_0 = 6.1512e-04
Loss = 4.6097e-02, PNorm = 151.3103, GNorm = 0.3738, lr_0 = 6.1470e-04
Loss = 5.5465e-02, PNorm = 151.3652, GNorm = 0.3209, lr_0 = 6.1428e-04
Loss = 5.7569e-02, PNorm = 151.4223, GNorm = 0.6052, lr_0 = 6.1385e-04
Loss = 5.9041e-02, PNorm = 151.4877, GNorm = 1.0914, lr_0 = 6.1343e-04
Loss = 4.3222e-02, PNorm = 151.5486, GNorm = 0.6182, lr_0 = 6.1301e-04
Loss = 5.1733e-02, PNorm = 151.6123, GNorm = 0.5698, lr_0 = 6.1259e-04
Loss = 5.2806e-02, PNorm = 151.6706, GNorm = 0.2822, lr_0 = 6.1217e-04
Loss = 4.9218e-02, PNorm = 151.7279, GNorm = 1.1454, lr_0 = 6.1175e-04
Loss = 6.2240e-02, PNorm = 151.7885, GNorm = 0.5438, lr_0 = 6.1134e-04
Loss = 4.7106e-02, PNorm = 151.8476, GNorm = 0.5273, lr_0 = 6.1092e-04
Loss = 4.7629e-02, PNorm = 151.9076, GNorm = 0.6889, lr_0 = 6.1050e-04
Validation mae = 0.123164
Epoch 8
Loss = 4.0078e-02, PNorm = 151.9588, GNorm = 0.4560, lr_0 = 6.1008e-04
Loss = 4.3307e-02, PNorm = 152.0031, GNorm = 0.2879, lr_0 = 6.0966e-04
Loss = 4.4031e-02, PNorm = 152.0370, GNorm = 0.4576, lr_0 = 6.0924e-04
Loss = 4.6685e-02, PNorm = 152.0783, GNorm = 0.4044, lr_0 = 6.0883e-04
Loss = 3.6408e-02, PNorm = 152.1196, GNorm = 0.3159, lr_0 = 6.0841e-04
Loss = 3.9624e-02, PNorm = 152.1589, GNorm = 0.3570, lr_0 = 6.0799e-04
Loss = 3.5120e-02, PNorm = 152.2000, GNorm = 0.3245, lr_0 = 6.0758e-04
Loss = 4.1485e-02, PNorm = 152.2355, GNorm = 0.3512, lr_0 = 6.0716e-04
Loss = 3.9917e-02, PNorm = 152.2744, GNorm = 0.4399, lr_0 = 6.0674e-04
Loss = 3.6773e-02, PNorm = 152.3087, GNorm = 0.4429, lr_0 = 6.0633e-04
Loss = 3.9325e-02, PNorm = 152.3445, GNorm = 0.5117, lr_0 = 6.0591e-04
Loss = 4.1864e-02, PNorm = 152.3849, GNorm = 0.2905, lr_0 = 6.0550e-04
Loss = 4.1882e-02, PNorm = 152.4283, GNorm = 0.2130, lr_0 = 6.0508e-04
Loss = 3.7686e-02, PNorm = 152.4590, GNorm = 0.3949, lr_0 = 6.0467e-04
Loss = 3.2506e-02, PNorm = 152.4955, GNorm = 0.6616, lr_0 = 6.0425e-04
Loss = 3.2857e-02, PNorm = 152.5297, GNorm = 0.2384, lr_0 = 6.0384e-04
Loss = 4.0351e-02, PNorm = 152.5656, GNorm = 0.7318, lr_0 = 6.0343e-04
Loss = 4.4189e-02, PNorm = 152.6055, GNorm = 0.7959, lr_0 = 6.0301e-04
Loss = 4.5530e-02, PNorm = 152.6425, GNorm = 0.6541, lr_0 = 6.0260e-04
Loss = 4.1070e-02, PNorm = 152.6878, GNorm = 0.2645, lr_0 = 6.0219e-04
Loss = 3.4283e-02, PNorm = 152.7305, GNorm = 0.3164, lr_0 = 6.0178e-04
Loss = 4.1339e-02, PNorm = 152.7657, GNorm = 0.4683, lr_0 = 6.0136e-04
Loss = 3.6626e-02, PNorm = 152.8081, GNorm = 0.4874, lr_0 = 6.0095e-04
Loss = 3.8685e-02, PNorm = 152.8451, GNorm = 0.7319, lr_0 = 6.0054e-04
Loss = 3.2372e-02, PNorm = 152.8849, GNorm = 0.4748, lr_0 = 6.0013e-04
Loss = 4.2023e-02, PNorm = 152.9261, GNorm = 0.6284, lr_0 = 5.9972e-04
Loss = 3.7203e-02, PNorm = 152.9635, GNorm = 0.4503, lr_0 = 5.9931e-04
Loss = 3.5155e-02, PNorm = 153.0077, GNorm = 0.3664, lr_0 = 5.9890e-04
Loss = 4.1432e-02, PNorm = 153.0410, GNorm = 0.5279, lr_0 = 5.9849e-04
Loss = 3.8137e-02, PNorm = 153.0832, GNorm = 0.3036, lr_0 = 5.9808e-04
Loss = 3.6274e-02, PNorm = 153.1263, GNorm = 0.4246, lr_0 = 5.9767e-04
Loss = 4.2522e-02, PNorm = 153.1655, GNorm = 0.3300, lr_0 = 5.9726e-04
Loss = 3.4487e-02, PNorm = 153.2090, GNorm = 0.8257, lr_0 = 5.9685e-04
Loss = 2.9981e-02, PNorm = 153.2474, GNorm = 0.2991, lr_0 = 5.9644e-04
Loss = 3.5434e-02, PNorm = 153.2881, GNorm = 0.4800, lr_0 = 5.9603e-04
Loss = 3.8610e-02, PNorm = 153.3317, GNorm = 0.5063, lr_0 = 5.9562e-04
Loss = 3.5172e-02, PNorm = 153.3719, GNorm = 0.5248, lr_0 = 5.9521e-04
Loss = 3.6476e-02, PNorm = 153.4117, GNorm = 0.4175, lr_0 = 5.9481e-04
Loss = 3.8059e-02, PNorm = 153.4493, GNorm = 0.5622, lr_0 = 5.9440e-04
Loss = 3.6751e-02, PNorm = 153.4811, GNorm = 0.2938, lr_0 = 5.9399e-04
Loss = 4.2201e-02, PNorm = 153.5229, GNorm = 0.5092, lr_0 = 5.9358e-04
Loss = 3.7993e-02, PNorm = 153.5666, GNorm = 0.2814, lr_0 = 5.9318e-04
Loss = 3.8912e-02, PNorm = 153.6114, GNorm = 0.6340, lr_0 = 5.9277e-04
Loss = 4.9985e-02, PNorm = 153.6564, GNorm = 0.6277, lr_0 = 5.9236e-04
Loss = 3.1886e-02, PNorm = 153.7022, GNorm = 0.3158, lr_0 = 5.9196e-04
Loss = 3.4852e-02, PNorm = 153.7438, GNorm = 0.4031, lr_0 = 5.9155e-04
Loss = 3.2039e-02, PNorm = 153.7827, GNorm = 0.3808, lr_0 = 5.9115e-04
Loss = 3.5952e-02, PNorm = 153.8231, GNorm = 0.2739, lr_0 = 5.9074e-04
Loss = 4.1988e-02, PNorm = 153.8637, GNorm = 0.5940, lr_0 = 5.9034e-04
Loss = 3.6011e-02, PNorm = 153.9008, GNorm = 0.5262, lr_0 = 5.8993e-04
Loss = 4.1824e-02, PNorm = 153.9436, GNorm = 0.5699, lr_0 = 5.8953e-04
Loss = 3.7197e-02, PNorm = 153.9908, GNorm = 0.5556, lr_0 = 5.8913e-04
Loss = 4.3149e-02, PNorm = 154.0369, GNorm = 0.3461, lr_0 = 5.8872e-04
Loss = 3.5299e-02, PNorm = 154.0782, GNorm = 0.6728, lr_0 = 5.8832e-04
Loss = 3.9584e-02, PNorm = 154.1202, GNorm = 0.3117, lr_0 = 5.8792e-04
Loss = 4.1954e-02, PNorm = 154.1631, GNorm = 0.8475, lr_0 = 5.8751e-04
Loss = 3.2188e-02, PNorm = 154.2070, GNorm = 0.2489, lr_0 = 5.8711e-04
Loss = 3.9235e-02, PNorm = 154.2556, GNorm = 0.5815, lr_0 = 5.8671e-04
Loss = 3.3980e-02, PNorm = 154.3049, GNorm = 0.3472, lr_0 = 5.8631e-04
Loss = 3.8121e-02, PNorm = 154.3554, GNorm = 0.3117, lr_0 = 5.8591e-04
Loss = 4.2466e-02, PNorm = 154.4022, GNorm = 0.5870, lr_0 = 5.8550e-04
Loss = 4.0013e-02, PNorm = 154.4474, GNorm = 0.3676, lr_0 = 5.8510e-04
Loss = 4.0311e-02, PNorm = 154.4908, GNorm = 0.6511, lr_0 = 5.8470e-04
Loss = 5.6300e-02, PNorm = 154.5393, GNorm = 0.6456, lr_0 = 5.8430e-04
Loss = 3.7918e-02, PNorm = 154.5887, GNorm = 0.4410, lr_0 = 5.8390e-04
Loss = 3.7316e-02, PNorm = 154.6361, GNorm = 0.3288, lr_0 = 5.8350e-04
Loss = 3.8278e-02, PNorm = 154.6856, GNorm = 0.2948, lr_0 = 5.8310e-04
Loss = 3.6637e-02, PNorm = 154.7268, GNorm = 0.4141, lr_0 = 5.8270e-04
Loss = 4.6521e-02, PNorm = 154.7700, GNorm = 0.4289, lr_0 = 5.8230e-04
Loss = 3.9599e-02, PNorm = 154.8125, GNorm = 0.4283, lr_0 = 5.8190e-04
Loss = 4.0295e-02, PNorm = 154.8560, GNorm = 0.5485, lr_0 = 5.8151e-04
Loss = 3.3005e-02, PNorm = 154.9040, GNorm = 0.4923, lr_0 = 5.8111e-04
Loss = 4.3759e-02, PNorm = 154.9500, GNorm = 0.3868, lr_0 = 5.8071e-04
Loss = 5.3306e-02, PNorm = 154.9985, GNorm = 0.5305, lr_0 = 5.8031e-04
Loss = 3.8490e-02, PNorm = 155.0442, GNorm = 0.3835, lr_0 = 5.7991e-04
Loss = 3.8199e-02, PNorm = 155.0906, GNorm = 0.6220, lr_0 = 5.7952e-04
Loss = 4.9655e-02, PNorm = 155.1400, GNorm = 0.2942, lr_0 = 5.7912e-04
Loss = 4.0676e-02, PNorm = 155.1917, GNorm = 0.4028, lr_0 = 5.7872e-04
Loss = 3.7526e-02, PNorm = 155.2438, GNorm = 0.3669, lr_0 = 5.7833e-04
Loss = 4.4997e-02, PNorm = 155.2953, GNorm = 0.3560, lr_0 = 5.7793e-04
Loss = 4.2422e-02, PNorm = 155.3456, GNorm = 0.4551, lr_0 = 5.7753e-04
Loss = 3.9485e-02, PNorm = 155.3986, GNorm = 0.6987, lr_0 = 5.7714e-04
Loss = 3.3908e-02, PNorm = 155.4488, GNorm = 0.3128, lr_0 = 5.7674e-04
Loss = 4.3730e-02, PNorm = 155.4954, GNorm = 0.4832, lr_0 = 5.7635e-04
Loss = 4.0374e-02, PNorm = 155.5369, GNorm = 0.3996, lr_0 = 5.7595e-04
Loss = 3.7426e-02, PNorm = 155.5749, GNorm = 0.5013, lr_0 = 5.7556e-04
Loss = 5.5556e-02, PNorm = 155.6197, GNorm = 0.2732, lr_0 = 5.7516e-04
Loss = 4.2818e-02, PNorm = 155.6693, GNorm = 0.3657, lr_0 = 5.7477e-04
Loss = 4.5626e-02, PNorm = 155.7157, GNorm = 0.6494, lr_0 = 5.7438e-04
Loss = 4.0039e-02, PNorm = 155.7647, GNorm = 0.4929, lr_0 = 5.7398e-04
Loss = 4.6871e-02, PNorm = 155.8081, GNorm = 0.4804, lr_0 = 5.7359e-04
Loss = 4.5425e-02, PNorm = 155.8567, GNorm = 0.5579, lr_0 = 5.7320e-04
Loss = 3.6164e-02, PNorm = 155.9105, GNorm = 0.3056, lr_0 = 5.7280e-04
Loss = 4.8922e-02, PNorm = 155.9641, GNorm = 0.4245, lr_0 = 5.7241e-04
Loss = 4.0268e-02, PNorm = 156.0154, GNorm = 0.4408, lr_0 = 5.7202e-04
Loss = 4.5479e-02, PNorm = 156.0635, GNorm = 0.7679, lr_0 = 5.7163e-04
Loss = 4.2287e-02, PNorm = 156.1152, GNorm = 0.2714, lr_0 = 5.7124e-04
Loss = 4.2075e-02, PNorm = 156.1704, GNorm = 0.7039, lr_0 = 5.7084e-04
Loss = 4.5108e-02, PNorm = 156.2280, GNorm = 0.3983, lr_0 = 5.7045e-04
Loss = 4.7349e-02, PNorm = 156.2781, GNorm = 0.4755, lr_0 = 5.7006e-04
Loss = 4.5766e-02, PNorm = 156.3323, GNorm = 0.4213, lr_0 = 5.6967e-04
Loss = 4.5290e-02, PNorm = 156.3864, GNorm = 0.2149, lr_0 = 5.6928e-04
Loss = 4.9564e-02, PNorm = 156.4420, GNorm = 0.4181, lr_0 = 5.6889e-04
Loss = 3.5392e-02, PNorm = 156.4931, GNorm = 0.4442, lr_0 = 5.6850e-04
Loss = 4.8038e-02, PNorm = 156.5435, GNorm = 0.3267, lr_0 = 5.6811e-04
Loss = 4.3019e-02, PNorm = 156.5972, GNorm = 0.2521, lr_0 = 5.6772e-04
Loss = 3.4719e-02, PNorm = 156.6484, GNorm = 0.4207, lr_0 = 5.6733e-04
Loss = 3.2829e-02, PNorm = 156.6992, GNorm = 0.4325, lr_0 = 5.6695e-04
Loss = 4.1678e-02, PNorm = 156.7459, GNorm = 0.5437, lr_0 = 5.6656e-04
Loss = 4.4831e-02, PNorm = 156.7933, GNorm = 0.3528, lr_0 = 5.6617e-04
Loss = 4.4093e-02, PNorm = 156.8457, GNorm = 0.2585, lr_0 = 5.6578e-04
Loss = 3.4604e-02, PNorm = 156.9008, GNorm = 0.5757, lr_0 = 5.6539e-04
Loss = 4.3000e-02, PNorm = 156.9532, GNorm = 0.5230, lr_0 = 5.6501e-04
Loss = 4.1431e-02, PNorm = 157.0111, GNorm = 0.4017, lr_0 = 5.6462e-04
Loss = 5.2379e-02, PNorm = 157.0736, GNorm = 1.2791, lr_0 = 5.6423e-04
Loss = 4.0012e-02, PNorm = 157.1287, GNorm = 0.4220, lr_0 = 5.6385e-04
Loss = 4.7022e-02, PNorm = 157.1850, GNorm = 0.5166, lr_0 = 5.6346e-04
Loss = 4.3961e-02, PNorm = 157.2395, GNorm = 0.4055, lr_0 = 5.6307e-04
Loss = 4.9723e-02, PNorm = 157.2889, GNorm = 0.3178, lr_0 = 5.6269e-04
Loss = 4.2142e-02, PNorm = 157.3339, GNorm = 0.3513, lr_0 = 5.6230e-04
Validation mae = 0.122135
Epoch 9
Loss = 3.8028e-02, PNorm = 157.3778, GNorm = 0.3277, lr_0 = 5.6192e-04
Loss = 4.3620e-02, PNorm = 157.4093, GNorm = 1.0668, lr_0 = 5.6153e-04
Loss = 2.8245e-02, PNorm = 157.4405, GNorm = 0.5549, lr_0 = 5.6115e-04
Loss = 3.7034e-02, PNorm = 157.4757, GNorm = 0.3256, lr_0 = 5.6076e-04
Loss = 3.3614e-02, PNorm = 157.5101, GNorm = 0.5539, lr_0 = 5.6038e-04
Loss = 3.5943e-02, PNorm = 157.5452, GNorm = 0.7045, lr_0 = 5.6000e-04
Loss = 3.5385e-02, PNorm = 157.5812, GNorm = 0.3060, lr_0 = 5.5961e-04
Loss = 3.9782e-02, PNorm = 157.6206, GNorm = 0.4216, lr_0 = 5.5923e-04
Loss = 3.1016e-02, PNorm = 157.6529, GNorm = 0.4230, lr_0 = 5.5885e-04
Loss = 3.1129e-02, PNorm = 157.6837, GNorm = 0.3385, lr_0 = 5.5846e-04
Loss = 2.9922e-02, PNorm = 157.7157, GNorm = 0.5272, lr_0 = 5.5808e-04
Loss = 4.0533e-02, PNorm = 157.7446, GNorm = 0.5571, lr_0 = 5.5770e-04
Loss = 3.4722e-02, PNorm = 157.7782, GNorm = 0.3609, lr_0 = 5.5732e-04
Loss = 2.4533e-02, PNorm = 157.8082, GNorm = 0.5248, lr_0 = 5.5693e-04
Loss = 3.2124e-02, PNorm = 157.8356, GNorm = 0.4238, lr_0 = 5.5655e-04
Loss = 3.9904e-02, PNorm = 157.8714, GNorm = 0.5314, lr_0 = 5.5617e-04
Loss = 2.8778e-02, PNorm = 157.9058, GNorm = 0.6720, lr_0 = 5.5579e-04
Loss = 3.2739e-02, PNorm = 157.9408, GNorm = 0.2691, lr_0 = 5.5541e-04
Loss = 3.0515e-02, PNorm = 157.9782, GNorm = 0.4460, lr_0 = 5.5503e-04
Loss = 2.9887e-02, PNorm = 158.0138, GNorm = 0.4206, lr_0 = 5.5465e-04
Loss = 2.9187e-02, PNorm = 158.0542, GNorm = 0.3106, lr_0 = 5.5427e-04
Loss = 2.6164e-02, PNorm = 158.0893, GNorm = 0.2804, lr_0 = 5.5389e-04
Loss = 3.4684e-02, PNorm = 158.1242, GNorm = 0.7005, lr_0 = 5.5351e-04
Loss = 3.7191e-02, PNorm = 158.1589, GNorm = 0.3799, lr_0 = 5.5313e-04
Loss = 3.4839e-02, PNorm = 158.1940, GNorm = 0.4259, lr_0 = 5.5275e-04
Loss = 3.0532e-02, PNorm = 158.2276, GNorm = 0.3228, lr_0 = 5.5237e-04
Loss = 3.0548e-02, PNorm = 158.2620, GNorm = 0.3670, lr_0 = 5.5199e-04
Loss = 3.6810e-02, PNorm = 158.2977, GNorm = 0.6043, lr_0 = 5.5162e-04
Loss = 2.7327e-02, PNorm = 158.3343, GNorm = 0.3088, lr_0 = 5.5124e-04
Loss = 2.9459e-02, PNorm = 158.3660, GNorm = 0.6010, lr_0 = 5.5086e-04
Loss = 2.8285e-02, PNorm = 158.3981, GNorm = 0.3122, lr_0 = 5.5048e-04
Loss = 3.0679e-02, PNorm = 158.4345, GNorm = 0.1940, lr_0 = 5.5011e-04
Loss = 3.8119e-02, PNorm = 158.4701, GNorm = 0.5820, lr_0 = 5.4973e-04
Loss = 3.1638e-02, PNorm = 158.5008, GNorm = 0.4016, lr_0 = 5.4935e-04
Loss = 4.5138e-02, PNorm = 158.5386, GNorm = 1.2759, lr_0 = 5.4898e-04
Loss = 3.2833e-02, PNorm = 158.5768, GNorm = 0.3937, lr_0 = 5.4860e-04
Loss = 3.1869e-02, PNorm = 158.6191, GNorm = 0.3041, lr_0 = 5.4822e-04
Loss = 2.6137e-02, PNorm = 158.6560, GNorm = 0.7007, lr_0 = 5.4785e-04
Loss = 3.3424e-02, PNorm = 158.6925, GNorm = 0.7941, lr_0 = 5.4747e-04
Loss = 3.2590e-02, PNorm = 158.7288, GNorm = 0.6419, lr_0 = 5.4710e-04
Loss = 3.5579e-02, PNorm = 158.7698, GNorm = 0.5568, lr_0 = 5.4672e-04
Loss = 2.8625e-02, PNorm = 158.8073, GNorm = 0.6666, lr_0 = 5.4635e-04
Loss = 3.0422e-02, PNorm = 158.8407, GNorm = 0.4031, lr_0 = 5.4597e-04
Loss = 2.8457e-02, PNorm = 158.8724, GNorm = 0.2485, lr_0 = 5.4560e-04
Loss = 3.0762e-02, PNorm = 158.9037, GNorm = 0.8024, lr_0 = 5.4523e-04
Loss = 3.2018e-02, PNorm = 158.9485, GNorm = 0.2827, lr_0 = 5.4485e-04
Loss = 2.9132e-02, PNorm = 158.9897, GNorm = 0.3757, lr_0 = 5.4448e-04
Loss = 3.6837e-02, PNorm = 159.0306, GNorm = 0.3004, lr_0 = 5.4411e-04
Loss = 3.2436e-02, PNorm = 159.0705, GNorm = 0.3365, lr_0 = 5.4373e-04
Loss = 2.4340e-02, PNorm = 159.1051, GNorm = 0.3145, lr_0 = 5.4336e-04
Loss = 4.1792e-02, PNorm = 159.1383, GNorm = 0.6517, lr_0 = 5.4299e-04
Loss = 2.8134e-02, PNorm = 159.1753, GNorm = 0.4575, lr_0 = 5.4262e-04
Loss = 3.0023e-02, PNorm = 159.2103, GNorm = 0.5864, lr_0 = 5.4225e-04
Loss = 2.7107e-02, PNorm = 159.2426, GNorm = 0.6385, lr_0 = 5.4187e-04
Loss = 5.5169e-02, PNorm = 159.2850, GNorm = 0.7528, lr_0 = 5.4150e-04
Loss = 3.7529e-02, PNorm = 159.3270, GNorm = 0.3844, lr_0 = 5.4113e-04
Loss = 3.1980e-02, PNorm = 159.3667, GNorm = 0.8914, lr_0 = 5.4076e-04
Loss = 3.2590e-02, PNorm = 159.4051, GNorm = 0.3019, lr_0 = 5.4039e-04
Loss = 3.0449e-02, PNorm = 159.4451, GNorm = 0.3906, lr_0 = 5.4002e-04
Loss = 2.8311e-02, PNorm = 159.4830, GNorm = 0.4034, lr_0 = 5.3965e-04
Loss = 3.3134e-02, PNorm = 159.5229, GNorm = 0.4700, lr_0 = 5.3928e-04
Loss = 2.6424e-02, PNorm = 159.5571, GNorm = 0.4868, lr_0 = 5.3891e-04
Loss = 3.3660e-02, PNorm = 159.5896, GNorm = 0.2337, lr_0 = 5.3854e-04
Loss = 3.7841e-02, PNorm = 159.6268, GNorm = 0.5690, lr_0 = 5.3817e-04
Loss = 3.3053e-02, PNorm = 159.6636, GNorm = 0.4960, lr_0 = 5.3781e-04
Loss = 2.6130e-02, PNorm = 159.7047, GNorm = 0.3847, lr_0 = 5.3744e-04
Loss = 3.6869e-02, PNorm = 159.7453, GNorm = 0.2269, lr_0 = 5.3707e-04
Loss = 3.4628e-02, PNorm = 159.7859, GNorm = 0.5173, lr_0 = 5.3670e-04
Loss = 3.5270e-02, PNorm = 159.8259, GNorm = 0.3144, lr_0 = 5.3633e-04
Loss = 2.9131e-02, PNorm = 159.8679, GNorm = 0.2318, lr_0 = 5.3597e-04
Loss = 3.6275e-02, PNorm = 159.9072, GNorm = 0.5632, lr_0 = 5.3560e-04
Loss = 3.6099e-02, PNorm = 159.9472, GNorm = 1.5965, lr_0 = 5.3523e-04
Loss = 2.8698e-02, PNorm = 159.9919, GNorm = 0.3932, lr_0 = 5.3486e-04
Loss = 3.8130e-02, PNorm = 160.0311, GNorm = 0.2785, lr_0 = 5.3450e-04
Loss = 3.2122e-02, PNorm = 160.0787, GNorm = 0.5295, lr_0 = 5.3413e-04
Loss = 3.1065e-02, PNorm = 160.1226, GNorm = 0.5589, lr_0 = 5.3377e-04
Loss = 3.5877e-02, PNorm = 160.1680, GNorm = 0.2861, lr_0 = 5.3340e-04
Loss = 3.6222e-02, PNorm = 160.2129, GNorm = 0.5018, lr_0 = 5.3304e-04
Loss = 3.3959e-02, PNorm = 160.2582, GNorm = 0.2826, lr_0 = 5.3267e-04
Loss = 3.9389e-02, PNorm = 160.3061, GNorm = 0.6203, lr_0 = 5.3231e-04
Loss = 3.3421e-02, PNorm = 160.3491, GNorm = 0.3349, lr_0 = 5.3194e-04
Loss = 3.5593e-02, PNorm = 160.3901, GNorm = 0.4249, lr_0 = 5.3158e-04
Loss = 2.9588e-02, PNorm = 160.4331, GNorm = 0.5370, lr_0 = 5.3121e-04
Loss = 4.0663e-02, PNorm = 160.4725, GNorm = 0.5730, lr_0 = 5.3085e-04
Loss = 3.8712e-02, PNorm = 160.5164, GNorm = 0.5453, lr_0 = 5.3048e-04
Loss = 4.2015e-02, PNorm = 160.5607, GNorm = 0.3112, lr_0 = 5.3012e-04
Loss = 3.6507e-02, PNorm = 160.6030, GNorm = 0.2652, lr_0 = 5.2976e-04
Loss = 3.5625e-02, PNorm = 160.6445, GNorm = 0.3550, lr_0 = 5.2939e-04
Loss = 3.0000e-02, PNorm = 160.6802, GNorm = 0.4371, lr_0 = 5.2903e-04
Loss = 2.9604e-02, PNorm = 160.7137, GNorm = 0.3363, lr_0 = 5.2867e-04
Loss = 3.9922e-02, PNorm = 160.7522, GNorm = 0.3444, lr_0 = 5.2831e-04
Loss = 3.4764e-02, PNorm = 160.7886, GNorm = 0.4166, lr_0 = 5.2795e-04
Loss = 4.2443e-02, PNorm = 160.8303, GNorm = 0.2196, lr_0 = 5.2758e-04
Loss = 3.0009e-02, PNorm = 160.8730, GNorm = 0.3654, lr_0 = 5.2722e-04
Loss = 2.8391e-02, PNorm = 160.9119, GNorm = 0.3862, lr_0 = 5.2686e-04
Loss = 3.3231e-02, PNorm = 160.9499, GNorm = 0.4432, lr_0 = 5.2650e-04
Loss = 3.3617e-02, PNorm = 160.9915, GNorm = 0.4475, lr_0 = 5.2614e-04
Loss = 3.6546e-02, PNorm = 161.0355, GNorm = 0.4332, lr_0 = 5.2578e-04
Loss = 3.4007e-02, PNorm = 161.0819, GNorm = 0.2158, lr_0 = 5.2542e-04
Loss = 3.0176e-02, PNorm = 161.1292, GNorm = 0.6605, lr_0 = 5.2506e-04
Loss = 2.9107e-02, PNorm = 161.1717, GNorm = 0.3211, lr_0 = 5.2470e-04
Loss = 3.8332e-02, PNorm = 161.2128, GNorm = 0.6987, lr_0 = 5.2434e-04
Loss = 3.8672e-02, PNorm = 161.2627, GNorm = 0.5578, lr_0 = 5.2398e-04
Loss = 2.8162e-02, PNorm = 161.3119, GNorm = 0.3092, lr_0 = 5.2362e-04
Loss = 2.5529e-02, PNorm = 161.3518, GNorm = 0.2295, lr_0 = 5.2326e-04
Loss = 3.4756e-02, PNorm = 161.3878, GNorm = 0.2012, lr_0 = 5.2290e-04
Loss = 3.8495e-02, PNorm = 161.4250, GNorm = 0.4255, lr_0 = 5.2255e-04
Loss = 3.9016e-02, PNorm = 161.4648, GNorm = 0.2103, lr_0 = 5.2219e-04
Loss = 3.0224e-02, PNorm = 161.5061, GNorm = 0.4373, lr_0 = 5.2183e-04
Loss = 3.1736e-02, PNorm = 161.5518, GNorm = 0.3090, lr_0 = 5.2147e-04
Loss = 2.9685e-02, PNorm = 161.6012, GNorm = 0.3576, lr_0 = 5.2112e-04
Loss = 3.5548e-02, PNorm = 161.6424, GNorm = 0.5903, lr_0 = 5.2076e-04
Loss = 3.2676e-02, PNorm = 161.6843, GNorm = 0.3080, lr_0 = 5.2040e-04
Loss = 3.3196e-02, PNorm = 161.7321, GNorm = 0.3797, lr_0 = 5.2005e-04
Loss = 3.1977e-02, PNorm = 161.7788, GNorm = 0.5047, lr_0 = 5.1969e-04
Loss = 2.9250e-02, PNorm = 161.8207, GNorm = 0.4715, lr_0 = 5.1933e-04
Loss = 3.7802e-02, PNorm = 161.8623, GNorm = 0.9924, lr_0 = 5.1898e-04
Loss = 2.9107e-02, PNorm = 161.9044, GNorm = 0.5332, lr_0 = 5.1862e-04
Loss = 3.3047e-02, PNorm = 161.9473, GNorm = 0.4256, lr_0 = 5.1827e-04
Loss = 3.5719e-02, PNorm = 161.9921, GNorm = 0.6922, lr_0 = 5.1791e-04
Validation mae = 0.123008
Epoch 10
Loss = 2.6807e-02, PNorm = 162.0303, GNorm = 0.2347, lr_0 = 5.1756e-04
Loss = 2.6677e-02, PNorm = 162.0613, GNorm = 0.2588, lr_0 = 5.1720e-04
Loss = 2.9300e-02, PNorm = 162.0940, GNorm = 0.4400, lr_0 = 5.1685e-04
Loss = 3.1787e-02, PNorm = 162.1217, GNorm = 0.3932, lr_0 = 5.1649e-04
Loss = 2.7618e-02, PNorm = 162.1520, GNorm = 0.3566, lr_0 = 5.1614e-04
Loss = 2.3903e-02, PNorm = 162.1821, GNorm = 0.3331, lr_0 = 5.1579e-04
Loss = 2.7192e-02, PNorm = 162.2139, GNorm = 0.3134, lr_0 = 5.1543e-04
Loss = 3.5743e-02, PNorm = 162.2494, GNorm = 0.4352, lr_0 = 5.1508e-04
Loss = 2.2689e-02, PNorm = 162.2779, GNorm = 0.2630, lr_0 = 5.1473e-04
Loss = 2.2895e-02, PNorm = 162.3060, GNorm = 0.2889, lr_0 = 5.1437e-04
Loss = 2.4313e-02, PNorm = 162.3286, GNorm = 0.3666, lr_0 = 5.1402e-04
Loss = 2.2239e-02, PNorm = 162.3533, GNorm = 0.3055, lr_0 = 5.1367e-04
Loss = 2.4800e-02, PNorm = 162.3798, GNorm = 0.3609, lr_0 = 5.1332e-04
Loss = 2.2167e-02, PNorm = 162.4037, GNorm = 0.4924, lr_0 = 5.1297e-04
Loss = 2.6115e-02, PNorm = 162.4260, GNorm = 0.4611, lr_0 = 5.1262e-04
Loss = 3.1185e-02, PNorm = 162.4498, GNorm = 0.7911, lr_0 = 5.1226e-04
Loss = 3.5041e-02, PNorm = 162.4802, GNorm = 0.3525, lr_0 = 5.1191e-04
Loss = 3.3430e-02, PNorm = 162.5081, GNorm = 0.3385, lr_0 = 5.1156e-04
Loss = 2.3082e-02, PNorm = 162.5398, GNorm = 0.4178, lr_0 = 5.1121e-04
Loss = 2.2948e-02, PNorm = 162.5665, GNorm = 0.5980, lr_0 = 5.1086e-04
Loss = 3.0551e-02, PNorm = 162.5952, GNorm = 0.5721, lr_0 = 5.1051e-04
Loss = 2.7857e-02, PNorm = 162.6216, GNorm = 0.2653, lr_0 = 5.1016e-04
Loss = 2.6787e-02, PNorm = 162.6446, GNorm = 0.4660, lr_0 = 5.0981e-04
Loss = 2.5274e-02, PNorm = 162.6714, GNorm = 0.7789, lr_0 = 5.0946e-04
Loss = 2.3847e-02, PNorm = 162.6969, GNorm = 0.2888, lr_0 = 5.0911e-04
Loss = 2.7263e-02, PNorm = 162.7271, GNorm = 0.2388, lr_0 = 5.0877e-04
Loss = 2.3064e-02, PNorm = 162.7569, GNorm = 0.3025, lr_0 = 5.0842e-04
Loss = 4.3247e-02, PNorm = 162.7895, GNorm = 0.3663, lr_0 = 5.0807e-04
Loss = 2.4444e-02, PNorm = 162.8183, GNorm = 0.3658, lr_0 = 5.0772e-04
Loss = 2.9666e-02, PNorm = 162.8514, GNorm = 0.2114, lr_0 = 5.0737e-04
Loss = 2.4232e-02, PNorm = 162.8819, GNorm = 0.2665, lr_0 = 5.0703e-04
Loss = 2.5775e-02, PNorm = 162.9119, GNorm = 0.2826, lr_0 = 5.0668e-04
Loss = 2.4606e-02, PNorm = 162.9430, GNorm = 0.4512, lr_0 = 5.0633e-04
Loss = 3.4655e-02, PNorm = 162.9707, GNorm = 0.3383, lr_0 = 5.0598e-04
Loss = 2.8476e-02, PNorm = 163.0005, GNorm = 0.3366, lr_0 = 5.0564e-04
Loss = 2.6684e-02, PNorm = 163.0260, GNorm = 0.2647, lr_0 = 5.0529e-04
Loss = 2.2610e-02, PNorm = 163.0548, GNorm = 0.2727, lr_0 = 5.0494e-04
Loss = 2.4446e-02, PNorm = 163.0877, GNorm = 0.2427, lr_0 = 5.0460e-04
Loss = 2.3655e-02, PNorm = 163.1162, GNorm = 0.2029, lr_0 = 5.0425e-04
Loss = 2.7843e-02, PNorm = 163.1478, GNorm = 0.2887, lr_0 = 5.0391e-04
Loss = 2.5228e-02, PNorm = 163.1786, GNorm = 0.3172, lr_0 = 5.0356e-04
Loss = 2.6197e-02, PNorm = 163.2061, GNorm = 0.2680, lr_0 = 5.0322e-04
Loss = 2.6282e-02, PNorm = 163.2355, GNorm = 0.4285, lr_0 = 5.0287e-04
Loss = 2.5101e-02, PNorm = 163.2636, GNorm = 0.3832, lr_0 = 5.0253e-04
Loss = 2.3024e-02, PNorm = 163.2930, GNorm = 0.2973, lr_0 = 5.0218e-04
Loss = 3.0423e-02, PNorm = 163.3242, GNorm = 0.4945, lr_0 = 5.0184e-04
Loss = 2.2965e-02, PNorm = 163.3583, GNorm = 0.4201, lr_0 = 5.0150e-04
Loss = 2.6696e-02, PNorm = 163.3892, GNorm = 0.2019, lr_0 = 5.0115e-04
Loss = 3.0959e-02, PNorm = 163.4218, GNorm = 0.2698, lr_0 = 5.0081e-04
Loss = 2.4962e-02, PNorm = 163.4484, GNorm = 0.4284, lr_0 = 5.0047e-04
Loss = 2.6821e-02, PNorm = 163.4804, GNorm = 0.5630, lr_0 = 5.0012e-04
Loss = 3.2565e-02, PNorm = 163.5098, GNorm = 0.4123, lr_0 = 4.9978e-04
Loss = 3.1018e-02, PNorm = 163.5416, GNorm = 1.0754, lr_0 = 4.9944e-04
Loss = 2.2042e-02, PNorm = 163.5753, GNorm = 0.4031, lr_0 = 4.9910e-04
Loss = 3.9485e-02, PNorm = 163.6023, GNorm = 0.4406, lr_0 = 4.9875e-04
Loss = 2.5336e-02, PNorm = 163.6329, GNorm = 0.5624, lr_0 = 4.9841e-04
Loss = 3.3470e-02, PNorm = 163.6643, GNorm = 0.3304, lr_0 = 4.9807e-04
Loss = 3.2316e-02, PNorm = 163.6960, GNorm = 0.4585, lr_0 = 4.9773e-04
Loss = 2.5127e-02, PNorm = 163.7301, GNorm = 0.3447, lr_0 = 4.9739e-04
Loss = 3.1454e-02, PNorm = 163.7648, GNorm = 0.6457, lr_0 = 4.9705e-04
Loss = 2.3937e-02, PNorm = 163.7991, GNorm = 0.3630, lr_0 = 4.9671e-04
Loss = 2.2997e-02, PNorm = 163.8323, GNorm = 0.2409, lr_0 = 4.9637e-04
Loss = 3.1963e-02, PNorm = 163.8653, GNorm = 0.5104, lr_0 = 4.9603e-04
Loss = 2.4790e-02, PNorm = 163.8997, GNorm = 0.2686, lr_0 = 4.9569e-04
Loss = 3.0672e-02, PNorm = 163.9339, GNorm = 0.1792, lr_0 = 4.9535e-04
Loss = 2.8414e-02, PNorm = 163.9667, GNorm = 0.2359, lr_0 = 4.9501e-04
Loss = 2.4428e-02, PNorm = 163.9997, GNorm = 0.4047, lr_0 = 4.9467e-04
Loss = 3.1761e-02, PNorm = 164.0305, GNorm = 0.3579, lr_0 = 4.9433e-04
Loss = 2.8825e-02, PNorm = 164.0637, GNorm = 0.3630, lr_0 = 4.9399e-04
Loss = 3.1319e-02, PNorm = 164.0983, GNorm = 0.3651, lr_0 = 4.9365e-04
Loss = 3.6290e-02, PNorm = 164.1290, GNorm = 0.5960, lr_0 = 4.9332e-04
Loss = 3.2479e-02, PNorm = 164.1650, GNorm = 0.4190, lr_0 = 4.9298e-04
Loss = 2.9799e-02, PNorm = 164.1991, GNorm = 0.3142, lr_0 = 4.9264e-04
Loss = 2.5570e-02, PNorm = 164.2311, GNorm = 0.2677, lr_0 = 4.9230e-04
Loss = 2.2842e-02, PNorm = 164.2583, GNorm = 0.4545, lr_0 = 4.9197e-04
Loss = 2.4406e-02, PNorm = 164.2916, GNorm = 0.4261, lr_0 = 4.9163e-04
Loss = 3.0734e-02, PNorm = 164.3268, GNorm = 0.3280, lr_0 = 4.9129e-04
Loss = 3.4618e-02, PNorm = 164.3662, GNorm = 0.6297, lr_0 = 4.9095e-04
Loss = 2.5988e-02, PNorm = 164.4008, GNorm = 0.4419, lr_0 = 4.9062e-04
Loss = 2.9339e-02, PNorm = 164.4364, GNorm = 0.3909, lr_0 = 4.9028e-04
Loss = 3.2020e-02, PNorm = 164.4718, GNorm = 0.5660, lr_0 = 4.8995e-04
Loss = 2.2262e-02, PNorm = 164.5036, GNorm = 0.2577, lr_0 = 4.8961e-04
Loss = 2.3188e-02, PNorm = 164.5412, GNorm = 0.3170, lr_0 = 4.8928e-04
Loss = 2.7167e-02, PNorm = 164.5786, GNorm = 0.3520, lr_0 = 4.8894e-04
Loss = 2.5685e-02, PNorm = 164.6141, GNorm = 0.2658, lr_0 = 4.8861e-04
Loss = 2.7026e-02, PNorm = 164.6508, GNorm = 0.3553, lr_0 = 4.8827e-04
Loss = 2.4138e-02, PNorm = 164.6828, GNorm = 0.4480, lr_0 = 4.8794e-04
Loss = 3.1021e-02, PNorm = 164.7152, GNorm = 0.3751, lr_0 = 4.8760e-04
Loss = 2.3148e-02, PNorm = 164.7486, GNorm = 0.2986, lr_0 = 4.8727e-04
Loss = 2.6495e-02, PNorm = 164.7797, GNorm = 0.2211, lr_0 = 4.8693e-04
Loss = 2.4222e-02, PNorm = 164.8112, GNorm = 0.4429, lr_0 = 4.8660e-04
Loss = 2.5820e-02, PNorm = 164.8474, GNorm = 0.4751, lr_0 = 4.8627e-04
Loss = 2.4658e-02, PNorm = 164.8817, GNorm = 0.5739, lr_0 = 4.8593e-04
Loss = 3.3290e-02, PNorm = 164.9131, GNorm = 0.2430, lr_0 = 4.8560e-04
Loss = 2.9256e-02, PNorm = 164.9464, GNorm = 0.3146, lr_0 = 4.8527e-04
Loss = 2.4850e-02, PNorm = 164.9839, GNorm = 0.4920, lr_0 = 4.8494e-04
Loss = 2.4692e-02, PNorm = 165.0234, GNorm = 0.2688, lr_0 = 4.8460e-04
Loss = 2.4144e-02, PNorm = 165.0574, GNorm = 0.3317, lr_0 = 4.8427e-04
Loss = 2.4584e-02, PNorm = 165.0915, GNorm = 0.2732, lr_0 = 4.8394e-04
Loss = 3.8514e-02, PNorm = 165.1284, GNorm = 0.4269, lr_0 = 4.8361e-04
Loss = 3.1927e-02, PNorm = 165.1648, GNorm = 0.2051, lr_0 = 4.8328e-04
Loss = 3.4328e-02, PNorm = 165.1964, GNorm = 0.6081, lr_0 = 4.8295e-04
Loss = 2.3217e-02, PNorm = 165.2283, GNorm = 0.2682, lr_0 = 4.8262e-04
Loss = 2.7120e-02, PNorm = 165.2600, GNorm = 0.1813, lr_0 = 4.8228e-04
Loss = 2.6798e-02, PNorm = 165.3003, GNorm = 0.5645, lr_0 = 4.8195e-04
Loss = 2.9237e-02, PNorm = 165.3386, GNorm = 0.2980, lr_0 = 4.8162e-04
Loss = 3.2340e-02, PNorm = 165.3729, GNorm = 0.8686, lr_0 = 4.8129e-04
Loss = 3.4881e-02, PNorm = 165.4122, GNorm = 0.2744, lr_0 = 4.8096e-04
Loss = 2.4028e-02, PNorm = 165.4489, GNorm = 0.4973, lr_0 = 4.8064e-04
Loss = 3.1421e-02, PNorm = 165.4826, GNorm = 0.6035, lr_0 = 4.8031e-04
Loss = 2.5237e-02, PNorm = 165.5234, GNorm = 0.5030, lr_0 = 4.7998e-04
Loss = 2.7959e-02, PNorm = 165.5663, GNorm = 0.5153, lr_0 = 4.7965e-04
Loss = 3.1135e-02, PNorm = 165.6038, GNorm = 0.8760, lr_0 = 4.7932e-04
Loss = 3.8511e-02, PNorm = 165.6444, GNorm = 0.3635, lr_0 = 4.7899e-04
Loss = 4.6418e-02, PNorm = 165.6929, GNorm = 0.4753, lr_0 = 4.7866e-04
Loss = 3.4088e-02, PNorm = 165.7327, GNorm = 0.9857, lr_0 = 4.7833e-04
Loss = 2.8540e-02, PNorm = 165.7728, GNorm = 0.3746, lr_0 = 4.7801e-04
Loss = 2.8446e-02, PNorm = 165.8100, GNorm = 0.2929, lr_0 = 4.7768e-04
Loss = 2.3735e-02, PNorm = 165.8395, GNorm = 0.6009, lr_0 = 4.7735e-04
Loss = 2.8878e-02, PNorm = 165.8741, GNorm = 0.3137, lr_0 = 4.7703e-04
Validation mae = 0.121920
Epoch 11
Loss = 3.0778e-02, PNorm = 165.9071, GNorm = 0.6246, lr_0 = 4.7670e-04
Loss = 2.7186e-02, PNorm = 165.9366, GNorm = 0.3864, lr_0 = 4.7637e-04
Loss = 2.4071e-02, PNorm = 165.9629, GNorm = 0.4763, lr_0 = 4.7605e-04
Loss = 2.9939e-02, PNorm = 165.9889, GNorm = 0.4518, lr_0 = 4.7572e-04
Loss = 2.8295e-02, PNorm = 166.0148, GNorm = 0.2931, lr_0 = 4.7539e-04
Loss = 2.3191e-02, PNorm = 166.0399, GNorm = 0.6691, lr_0 = 4.7507e-04
Loss = 2.4938e-02, PNorm = 166.0623, GNorm = 0.2739, lr_0 = 4.7474e-04
Loss = 2.6517e-02, PNorm = 166.0895, GNorm = 0.2820, lr_0 = 4.7442e-04
Loss = 2.1094e-02, PNorm = 166.1173, GNorm = 0.1627, lr_0 = 4.7409e-04
Loss = 2.1601e-02, PNorm = 166.1447, GNorm = 0.3470, lr_0 = 4.7377e-04
Loss = 2.5557e-02, PNorm = 166.1719, GNorm = 0.2686, lr_0 = 4.7344e-04
Loss = 2.3383e-02, PNorm = 166.2017, GNorm = 0.3092, lr_0 = 4.7312e-04
Loss = 2.3041e-02, PNorm = 166.2312, GNorm = 0.3225, lr_0 = 4.7279e-04
Loss = 2.1236e-02, PNorm = 166.2577, GNorm = 0.3228, lr_0 = 4.7247e-04
Loss = 2.0827e-02, PNorm = 166.2829, GNorm = 0.2242, lr_0 = 4.7215e-04
Loss = 2.8065e-02, PNorm = 166.3067, GNorm = 0.5059, lr_0 = 4.7182e-04
Loss = 2.4591e-02, PNorm = 166.3297, GNorm = 0.2606, lr_0 = 4.7150e-04
Loss = 2.5771e-02, PNorm = 166.3531, GNorm = 0.2757, lr_0 = 4.7118e-04
Loss = 2.9130e-02, PNorm = 166.3802, GNorm = 0.4319, lr_0 = 4.7085e-04
Loss = 2.7756e-02, PNorm = 166.4074, GNorm = 0.2754, lr_0 = 4.7053e-04
Loss = 1.8427e-02, PNorm = 166.4345, GNorm = 0.4122, lr_0 = 4.7021e-04
Loss = 1.9312e-02, PNorm = 166.4579, GNorm = 0.3979, lr_0 = 4.6989e-04
Loss = 2.1709e-02, PNorm = 166.4810, GNorm = 0.5177, lr_0 = 4.6957e-04
Loss = 1.8192e-02, PNorm = 166.5045, GNorm = 0.2710, lr_0 = 4.6924e-04
Loss = 2.9147e-02, PNorm = 166.5262, GNorm = 0.2237, lr_0 = 4.6892e-04
Loss = 2.1266e-02, PNorm = 166.5538, GNorm = 0.5081, lr_0 = 4.6860e-04
Loss = 1.9413e-02, PNorm = 166.5809, GNorm = 0.4035, lr_0 = 4.6828e-04
Loss = 1.6281e-02, PNorm = 166.6005, GNorm = 0.1534, lr_0 = 4.6796e-04
Loss = 2.5304e-02, PNorm = 166.6236, GNorm = 0.5520, lr_0 = 4.6764e-04
Loss = 2.0457e-02, PNorm = 166.6490, GNorm = 0.3124, lr_0 = 4.6732e-04
Loss = 2.2680e-02, PNorm = 166.6754, GNorm = 0.7723, lr_0 = 4.6700e-04
Loss = 1.9972e-02, PNorm = 166.7046, GNorm = 0.5406, lr_0 = 4.6668e-04
Loss = 3.2559e-02, PNorm = 166.7306, GNorm = 0.7593, lr_0 = 4.6636e-04
Loss = 2.4815e-02, PNorm = 166.7526, GNorm = 0.2953, lr_0 = 4.6604e-04
Loss = 2.3372e-02, PNorm = 166.7768, GNorm = 0.8918, lr_0 = 4.6572e-04
Loss = 2.3352e-02, PNorm = 166.7993, GNorm = 0.4048, lr_0 = 4.6540e-04
Loss = 2.4450e-02, PNorm = 166.8285, GNorm = 0.3846, lr_0 = 4.6508e-04
Loss = 1.9856e-02, PNorm = 166.8521, GNorm = 0.2847, lr_0 = 4.6476e-04
Loss = 2.6764e-02, PNorm = 166.8792, GNorm = 0.4024, lr_0 = 4.6445e-04
Loss = 2.0849e-02, PNorm = 166.9087, GNorm = 0.3104, lr_0 = 4.6413e-04
Loss = 1.7782e-02, PNorm = 166.9361, GNorm = 0.2921, lr_0 = 4.6381e-04
Loss = 3.3588e-02, PNorm = 166.9659, GNorm = 0.4186, lr_0 = 4.6349e-04
Loss = 2.0015e-02, PNorm = 166.9939, GNorm = 0.5180, lr_0 = 4.6317e-04
Loss = 2.0882e-02, PNorm = 167.0174, GNorm = 0.2503, lr_0 = 4.6286e-04
Loss = 2.5807e-02, PNorm = 167.0412, GNorm = 0.2921, lr_0 = 4.6254e-04
Loss = 2.4781e-02, PNorm = 167.0697, GNorm = 0.3626, lr_0 = 4.6222e-04
Loss = 2.0012e-02, PNorm = 167.0959, GNorm = 0.5356, lr_0 = 4.6191e-04
Loss = 2.4745e-02, PNorm = 167.1279, GNorm = 0.2319, lr_0 = 4.6159e-04
Loss = 2.5935e-02, PNorm = 167.1612, GNorm = 0.4372, lr_0 = 4.6127e-04
Loss = 2.2794e-02, PNorm = 167.1886, GNorm = 0.2224, lr_0 = 4.6096e-04
Loss = 2.3768e-02, PNorm = 167.2178, GNorm = 0.4079, lr_0 = 4.6064e-04
Loss = 2.6517e-02, PNorm = 167.2438, GNorm = 0.1935, lr_0 = 4.6033e-04
Loss = 2.0307e-02, PNorm = 167.2668, GNorm = 0.2960, lr_0 = 4.6001e-04
Loss = 2.3580e-02, PNorm = 167.2940, GNorm = 0.2128, lr_0 = 4.5970e-04
Loss = 2.9253e-02, PNorm = 167.3191, GNorm = 0.3486, lr_0 = 4.5938e-04
Loss = 2.9139e-02, PNorm = 167.3474, GNorm = 0.5343, lr_0 = 4.5907e-04
Loss = 2.2919e-02, PNorm = 167.3782, GNorm = 0.3845, lr_0 = 4.5875e-04
Loss = 1.7826e-02, PNorm = 167.4049, GNorm = 0.2165, lr_0 = 4.5844e-04
Loss = 2.2830e-02, PNorm = 167.4296, GNorm = 0.2199, lr_0 = 4.5812e-04
Loss = 2.1200e-02, PNorm = 167.4616, GNorm = 0.3316, lr_0 = 4.5781e-04
Loss = 2.7704e-02, PNorm = 167.4890, GNorm = 0.4544, lr_0 = 4.5750e-04
Loss = 2.1322e-02, PNorm = 167.5180, GNorm = 0.2494, lr_0 = 4.5718e-04
Loss = 2.3674e-02, PNorm = 167.5493, GNorm = 0.2445, lr_0 = 4.5687e-04
Loss = 2.6089e-02, PNorm = 167.5754, GNorm = 0.5798, lr_0 = 4.5656e-04
Loss = 2.5317e-02, PNorm = 167.6011, GNorm = 0.6119, lr_0 = 4.5624e-04
Loss = 2.4711e-02, PNorm = 167.6321, GNorm = 0.3740, lr_0 = 4.5593e-04
Loss = 2.0535e-02, PNorm = 167.6614, GNorm = 0.3596, lr_0 = 4.5562e-04
Loss = 2.3912e-02, PNorm = 167.6924, GNorm = 0.4931, lr_0 = 4.5531e-04
Loss = 2.0589e-02, PNorm = 167.7285, GNorm = 0.5401, lr_0 = 4.5499e-04
Loss = 2.2386e-02, PNorm = 167.7533, GNorm = 0.5044, lr_0 = 4.5468e-04
Loss = 2.5904e-02, PNorm = 167.7796, GNorm = 0.3268, lr_0 = 4.5437e-04
Loss = 1.8851e-02, PNorm = 167.8058, GNorm = 0.2397, lr_0 = 4.5406e-04
Loss = 3.0974e-02, PNorm = 167.8377, GNorm = 0.6115, lr_0 = 4.5375e-04
Loss = 2.2683e-02, PNorm = 167.8674, GNorm = 0.2390, lr_0 = 4.5344e-04
Loss = 2.7879e-02, PNorm = 167.8920, GNorm = 0.2524, lr_0 = 4.5313e-04
Loss = 4.3382e-02, PNorm = 167.9206, GNorm = 0.3035, lr_0 = 4.5282e-04
Loss = 2.6482e-02, PNorm = 167.9486, GNorm = 0.3166, lr_0 = 4.5251e-04
Loss = 1.9574e-02, PNorm = 167.9764, GNorm = 0.5287, lr_0 = 4.5220e-04
Loss = 2.3342e-02, PNorm = 168.0085, GNorm = 0.4748, lr_0 = 4.5189e-04
Loss = 2.0011e-02, PNorm = 168.0346, GNorm = 0.2091, lr_0 = 4.5158e-04
Loss = 2.5276e-02, PNorm = 168.0622, GNorm = 1.0582, lr_0 = 4.5127e-04
Loss = 2.3947e-02, PNorm = 168.0833, GNorm = 0.3727, lr_0 = 4.5096e-04
Loss = 3.1451e-02, PNorm = 168.1144, GNorm = 0.4799, lr_0 = 4.5065e-04
Loss = 3.7458e-02, PNorm = 168.1442, GNorm = 0.3062, lr_0 = 4.5034e-04
Loss = 2.3867e-02, PNorm = 168.1774, GNorm = 0.3062, lr_0 = 4.5003e-04
Loss = 2.6259e-02, PNorm = 168.2059, GNorm = 0.6326, lr_0 = 4.4972e-04
Loss = 2.2217e-02, PNorm = 168.2385, GNorm = 0.7042, lr_0 = 4.4942e-04
Loss = 1.9021e-02, PNorm = 168.2672, GNorm = 0.2875, lr_0 = 4.4911e-04
Loss = 2.5808e-02, PNorm = 168.2979, GNorm = 0.3881, lr_0 = 4.4880e-04
Loss = 2.0222e-02, PNorm = 168.3264, GNorm = 0.2858, lr_0 = 4.4849e-04
Loss = 2.2773e-02, PNorm = 168.3580, GNorm = 0.5780, lr_0 = 4.4819e-04
Loss = 1.7162e-02, PNorm = 168.3870, GNorm = 0.2733, lr_0 = 4.4788e-04
Loss = 1.7783e-02, PNorm = 168.4113, GNorm = 0.2603, lr_0 = 4.4757e-04
Loss = 2.4356e-02, PNorm = 168.4387, GNorm = 0.4912, lr_0 = 4.4727e-04
Loss = 1.7428e-02, PNorm = 168.4665, GNorm = 0.2100, lr_0 = 4.4696e-04
Loss = 2.2000e-02, PNorm = 168.4961, GNorm = 0.3180, lr_0 = 4.4665e-04
Loss = 2.4632e-02, PNorm = 168.5271, GNorm = 0.4083, lr_0 = 4.4635e-04
Loss = 2.7794e-02, PNorm = 168.5543, GNorm = 0.3073, lr_0 = 4.4604e-04
Loss = 2.3256e-02, PNorm = 168.5860, GNorm = 0.3165, lr_0 = 4.4574e-04
Loss = 2.1431e-02, PNorm = 168.6172, GNorm = 0.3300, lr_0 = 4.4543e-04
Loss = 1.8448e-02, PNorm = 168.6478, GNorm = 0.2453, lr_0 = 4.4513e-04
Loss = 2.2196e-02, PNorm = 168.6781, GNorm = 0.3063, lr_0 = 4.4482e-04
Loss = 2.3925e-02, PNorm = 168.7090, GNorm = 0.2668, lr_0 = 4.4452e-04
Loss = 2.2180e-02, PNorm = 168.7363, GNorm = 0.2103, lr_0 = 4.4421e-04
Loss = 1.9747e-02, PNorm = 168.7657, GNorm = 0.2777, lr_0 = 4.4391e-04
Loss = 2.2934e-02, PNorm = 168.7989, GNorm = 0.2516, lr_0 = 4.4360e-04
Loss = 1.7783e-02, PNorm = 168.8320, GNorm = 0.4147, lr_0 = 4.4330e-04
Loss = 2.1205e-02, PNorm = 168.8629, GNorm = 0.2851, lr_0 = 4.4299e-04
Loss = 1.8777e-02, PNorm = 168.8879, GNorm = 0.3696, lr_0 = 4.4269e-04
Loss = 2.2250e-02, PNorm = 168.9127, GNorm = 0.2202, lr_0 = 4.4239e-04
Loss = 2.4580e-02, PNorm = 168.9464, GNorm = 0.8088, lr_0 = 4.4209e-04
Loss = 2.2293e-02, PNorm = 168.9791, GNorm = 0.2298, lr_0 = 4.4178e-04
Loss = 2.1143e-02, PNorm = 169.0098, GNorm = 0.2491, lr_0 = 4.4148e-04
Loss = 1.7643e-02, PNorm = 169.0381, GNorm = 0.3251, lr_0 = 4.4118e-04
Loss = 2.6523e-02, PNorm = 169.0683, GNorm = 0.6388, lr_0 = 4.4088e-04
Loss = 2.5123e-02, PNorm = 169.0987, GNorm = 0.2301, lr_0 = 4.4057e-04
Loss = 2.1709e-02, PNorm = 169.1281, GNorm = 0.3541, lr_0 = 4.4027e-04
Loss = 3.3740e-02, PNorm = 169.1630, GNorm = 0.5131, lr_0 = 4.3997e-04
Loss = 2.8446e-02, PNorm = 169.1980, GNorm = 0.5602, lr_0 = 4.3967e-04
Loss = 2.2607e-02, PNorm = 169.2350, GNorm = 0.2395, lr_0 = 4.3937e-04
Validation mae = 0.122395
Epoch 12
Loss = 1.9349e-02, PNorm = 169.2590, GNorm = 0.3583, lr_0 = 4.3907e-04
Loss = 2.1396e-02, PNorm = 169.2785, GNorm = 0.3075, lr_0 = 4.3877e-04
Loss = 1.6518e-02, PNorm = 169.2971, GNorm = 0.1977, lr_0 = 4.3846e-04
Loss = 2.2591e-02, PNorm = 169.3181, GNorm = 0.2314, lr_0 = 4.3816e-04
Loss = 1.7182e-02, PNorm = 169.3372, GNorm = 0.2220, lr_0 = 4.3786e-04
Loss = 1.6899e-02, PNorm = 169.3544, GNorm = 0.2807, lr_0 = 4.3756e-04
Loss = 1.7383e-02, PNorm = 169.3747, GNorm = 0.6162, lr_0 = 4.3726e-04
Loss = 2.3052e-02, PNorm = 169.3906, GNorm = 0.1899, lr_0 = 4.3696e-04
Loss = 1.8179e-02, PNorm = 169.4096, GNorm = 0.3819, lr_0 = 4.3667e-04
Loss = 2.1595e-02, PNorm = 169.4287, GNorm = 0.4446, lr_0 = 4.3637e-04
Loss = 1.8577e-02, PNorm = 169.4509, GNorm = 0.2838, lr_0 = 4.3607e-04
Loss = 1.8134e-02, PNorm = 169.4740, GNorm = 0.1652, lr_0 = 4.3577e-04
Loss = 1.6683e-02, PNorm = 169.4930, GNorm = 0.1884, lr_0 = 4.3547e-04
Loss = 1.8186e-02, PNorm = 169.5111, GNorm = 0.4021, lr_0 = 4.3517e-04
Loss = 2.2780e-02, PNorm = 169.5290, GNorm = 0.2706, lr_0 = 4.3487e-04
Loss = 1.7634e-02, PNorm = 169.5503, GNorm = 0.1937, lr_0 = 4.3458e-04
Loss = 1.8709e-02, PNorm = 169.5686, GNorm = 0.3942, lr_0 = 4.3428e-04
Loss = 1.9230e-02, PNorm = 169.5848, GNorm = 0.4964, lr_0 = 4.3398e-04
Loss = 1.6677e-02, PNorm = 169.6015, GNorm = 0.3520, lr_0 = 4.3368e-04
Loss = 1.7798e-02, PNorm = 169.6204, GNorm = 0.3186, lr_0 = 4.3339e-04
Loss = 1.5805e-02, PNorm = 169.6395, GNorm = 0.1301, lr_0 = 4.3309e-04
Loss = 1.6645e-02, PNorm = 169.6556, GNorm = 0.2007, lr_0 = 4.3279e-04
Loss = 1.4806e-02, PNorm = 169.6731, GNorm = 0.5674, lr_0 = 4.3250e-04
Loss = 1.6300e-02, PNorm = 169.6901, GNorm = 0.2323, lr_0 = 4.3220e-04
Loss = 1.5873e-02, PNorm = 169.7069, GNorm = 0.2797, lr_0 = 4.3190e-04
Loss = 1.4108e-02, PNorm = 169.7289, GNorm = 0.1785, lr_0 = 4.3161e-04
Loss = 1.7606e-02, PNorm = 169.7466, GNorm = 0.2485, lr_0 = 4.3131e-04
Loss = 1.8838e-02, PNorm = 169.7656, GNorm = 0.3388, lr_0 = 4.3102e-04
Loss = 1.9029e-02, PNorm = 169.7880, GNorm = 0.2166, lr_0 = 4.3072e-04
Loss = 2.1101e-02, PNorm = 169.8082, GNorm = 0.1663, lr_0 = 4.3043e-04
Loss = 1.6224e-02, PNorm = 169.8258, GNorm = 0.3178, lr_0 = 4.3013e-04
Loss = 2.0244e-02, PNorm = 169.8466, GNorm = 0.3316, lr_0 = 4.2984e-04
Loss = 1.5541e-02, PNorm = 169.8652, GNorm = 0.2534, lr_0 = 4.2954e-04
Loss = 1.7561e-02, PNorm = 169.8857, GNorm = 0.3507, lr_0 = 4.2925e-04
Loss = 1.5511e-02, PNorm = 169.9038, GNorm = 0.3571, lr_0 = 4.2895e-04
Loss = 1.9589e-02, PNorm = 169.9263, GNorm = 0.1894, lr_0 = 4.2866e-04
Loss = 1.9448e-02, PNorm = 169.9454, GNorm = 0.3245, lr_0 = 4.2837e-04
Loss = 1.4839e-02, PNorm = 169.9650, GNorm = 0.2288, lr_0 = 4.2807e-04
Loss = 2.1813e-02, PNorm = 169.9859, GNorm = 0.2362, lr_0 = 4.2778e-04
Loss = 2.3023e-02, PNorm = 170.0057, GNorm = 0.5726, lr_0 = 4.2749e-04
Loss = 1.7901e-02, PNorm = 170.0244, GNorm = 0.2215, lr_0 = 4.2719e-04
Loss = 2.0223e-02, PNorm = 170.0427, GNorm = 0.2034, lr_0 = 4.2690e-04
Loss = 1.6831e-02, PNorm = 170.0612, GNorm = 0.2780, lr_0 = 4.2661e-04
Loss = 1.7720e-02, PNorm = 170.0809, GNorm = 0.7103, lr_0 = 4.2632e-04
Loss = 1.9907e-02, PNorm = 170.1016, GNorm = 0.3798, lr_0 = 4.2602e-04
Loss = 1.8184e-02, PNorm = 170.1220, GNorm = 0.3766, lr_0 = 4.2573e-04
Loss = 1.7681e-02, PNorm = 170.1421, GNorm = 0.3900, lr_0 = 4.2544e-04
Loss = 1.7594e-02, PNorm = 170.1631, GNorm = 0.1789, lr_0 = 4.2515e-04
Loss = 1.6675e-02, PNorm = 170.1847, GNorm = 0.3565, lr_0 = 4.2486e-04
Loss = 1.7004e-02, PNorm = 170.2047, GNorm = 0.4848, lr_0 = 4.2457e-04
Loss = 1.9245e-02, PNorm = 170.2244, GNorm = 0.3891, lr_0 = 4.2428e-04
Loss = 1.8407e-02, PNorm = 170.2464, GNorm = 0.4219, lr_0 = 4.2399e-04
Loss = 2.2559e-02, PNorm = 170.2688, GNorm = 0.3104, lr_0 = 4.2370e-04
Loss = 1.7250e-02, PNorm = 170.2927, GNorm = 0.1981, lr_0 = 4.2340e-04
Loss = 1.9020e-02, PNorm = 170.3174, GNorm = 0.5972, lr_0 = 4.2311e-04
Loss = 2.5149e-02, PNorm = 170.3406, GNorm = 0.3302, lr_0 = 4.2283e-04
Loss = 1.8610e-02, PNorm = 170.3639, GNorm = 0.3561, lr_0 = 4.2254e-04
Loss = 1.6274e-02, PNorm = 170.3886, GNorm = 0.2220, lr_0 = 4.2225e-04
Loss = 2.0610e-02, PNorm = 170.4102, GNorm = 0.4001, lr_0 = 4.2196e-04
Loss = 1.6433e-02, PNorm = 170.4303, GNorm = 0.4277, lr_0 = 4.2167e-04
Loss = 1.7899e-02, PNorm = 170.4552, GNorm = 0.3143, lr_0 = 4.2138e-04
Loss = 2.2888e-02, PNorm = 170.4779, GNorm = 0.5589, lr_0 = 4.2109e-04
Loss = 1.7214e-02, PNorm = 170.5061, GNorm = 0.2129, lr_0 = 4.2080e-04
Loss = 1.6854e-02, PNorm = 170.5315, GNorm = 0.3472, lr_0 = 4.2051e-04
Loss = 1.7054e-02, PNorm = 170.5520, GNorm = 0.2850, lr_0 = 4.2023e-04
Loss = 2.0161e-02, PNorm = 170.5749, GNorm = 0.4214, lr_0 = 4.1994e-04
Loss = 1.5682e-02, PNorm = 170.5993, GNorm = 0.2996, lr_0 = 4.1965e-04
Loss = 1.6047e-02, PNorm = 170.6244, GNorm = 0.3399, lr_0 = 4.1936e-04
Loss = 2.0845e-02, PNorm = 170.6519, GNorm = 0.5329, lr_0 = 4.1907e-04
Loss = 1.8282e-02, PNorm = 170.6814, GNorm = 0.2513, lr_0 = 4.1879e-04
Loss = 2.2294e-02, PNorm = 170.7044, GNorm = 0.2917, lr_0 = 4.1850e-04
Loss = 2.0274e-02, PNorm = 170.7286, GNorm = 0.6101, lr_0 = 4.1821e-04
Loss = 2.0646e-02, PNorm = 170.7546, GNorm = 0.3375, lr_0 = 4.1793e-04
Loss = 1.5069e-02, PNorm = 170.7761, GNorm = 0.3157, lr_0 = 4.1764e-04
Loss = 2.0392e-02, PNorm = 170.7982, GNorm = 0.2995, lr_0 = 4.1736e-04
Loss = 2.5173e-02, PNorm = 170.8182, GNorm = 0.2976, lr_0 = 4.1707e-04
Loss = 2.2136e-02, PNorm = 170.8333, GNorm = 0.4879, lr_0 = 4.1678e-04
Loss = 1.6186e-02, PNorm = 170.8553, GNorm = 0.2304, lr_0 = 4.1650e-04
Loss = 2.9779e-02, PNorm = 170.8798, GNorm = 0.1923, lr_0 = 4.1621e-04
Loss = 1.6490e-02, PNorm = 170.9062, GNorm = 0.1505, lr_0 = 4.1593e-04
Loss = 2.1229e-02, PNorm = 170.9349, GNorm = 0.5057, lr_0 = 4.1564e-04
Loss = 2.6452e-02, PNorm = 170.9624, GNorm = 0.4356, lr_0 = 4.1536e-04
Loss = 1.5455e-02, PNorm = 170.9841, GNorm = 0.2995, lr_0 = 4.1507e-04
Loss = 2.1649e-02, PNorm = 171.0048, GNorm = 0.1976, lr_0 = 4.1479e-04
Loss = 1.8412e-02, PNorm = 171.0271, GNorm = 0.3014, lr_0 = 4.1450e-04
Loss = 2.3385e-02, PNorm = 171.0519, GNorm = 0.4557, lr_0 = 4.1422e-04
Loss = 2.2842e-02, PNorm = 171.0759, GNorm = 0.5233, lr_0 = 4.1394e-04
Loss = 2.0918e-02, PNorm = 171.1032, GNorm = 0.4361, lr_0 = 4.1365e-04
Loss = 1.6300e-02, PNorm = 171.1302, GNorm = 0.3229, lr_0 = 4.1337e-04
Loss = 2.1084e-02, PNorm = 171.1567, GNorm = 0.2098, lr_0 = 4.1309e-04
Loss = 2.1647e-02, PNorm = 171.1825, GNorm = 0.2988, lr_0 = 4.1280e-04
Loss = 1.6922e-02, PNorm = 171.2103, GNorm = 0.3057, lr_0 = 4.1252e-04
Loss = 1.8829e-02, PNorm = 171.2332, GNorm = 0.2227, lr_0 = 4.1224e-04
Loss = 1.7769e-02, PNorm = 171.2546, GNorm = 0.1804, lr_0 = 4.1196e-04
Loss = 2.1028e-02, PNorm = 171.2801, GNorm = 0.3130, lr_0 = 4.1167e-04
Loss = 2.3749e-02, PNorm = 171.3054, GNorm = 0.2388, lr_0 = 4.1139e-04
Loss = 2.1811e-02, PNorm = 171.3317, GNorm = 0.3480, lr_0 = 4.1111e-04
Loss = 1.6467e-02, PNorm = 171.3552, GNorm = 0.3651, lr_0 = 4.1083e-04
Loss = 1.9484e-02, PNorm = 171.3850, GNorm = 0.2326, lr_0 = 4.1055e-04
Loss = 1.9435e-02, PNorm = 171.4141, GNorm = 0.5214, lr_0 = 4.1027e-04
Loss = 2.1062e-02, PNorm = 171.4408, GNorm = 0.2727, lr_0 = 4.0998e-04
Loss = 1.9006e-02, PNorm = 171.4678, GNorm = 0.5190, lr_0 = 4.0970e-04
Loss = 2.0651e-02, PNorm = 171.4914, GNorm = 0.2892, lr_0 = 4.0942e-04
Loss = 1.9512e-02, PNorm = 171.5139, GNorm = 0.3392, lr_0 = 4.0914e-04
Loss = 2.1154e-02, PNorm = 171.5391, GNorm = 0.3439, lr_0 = 4.0886e-04
Loss = 1.6333e-02, PNorm = 171.5626, GNorm = 0.3649, lr_0 = 4.0858e-04
Loss = 2.0054e-02, PNorm = 171.5877, GNorm = 0.1936, lr_0 = 4.0830e-04
Loss = 1.7341e-02, PNorm = 171.6113, GNorm = 0.2016, lr_0 = 4.0802e-04
Loss = 2.4692e-02, PNorm = 171.6371, GNorm = 0.2527, lr_0 = 4.0774e-04
Loss = 2.2591e-02, PNorm = 171.6626, GNorm = 0.2422, lr_0 = 4.0746e-04
Loss = 2.9150e-02, PNorm = 171.6846, GNorm = 0.2295, lr_0 = 4.0718e-04
Loss = 4.4123e-02, PNorm = 171.7121, GNorm = 0.2898, lr_0 = 4.0691e-04
Loss = 2.2533e-02, PNorm = 171.7364, GNorm = 0.5418, lr_0 = 4.0663e-04
Loss = 2.2952e-02, PNorm = 171.7661, GNorm = 0.3627, lr_0 = 4.0635e-04
Loss = 2.2828e-02, PNorm = 171.7948, GNorm = 0.2426, lr_0 = 4.0607e-04
Loss = 2.3468e-02, PNorm = 171.8201, GNorm = 0.2098, lr_0 = 4.0579e-04
Loss = 1.6933e-02, PNorm = 171.8419, GNorm = 0.3069, lr_0 = 4.0551e-04
Loss = 2.2795e-02, PNorm = 171.8684, GNorm = 0.3859, lr_0 = 4.0524e-04
Loss = 1.7782e-02, PNorm = 171.8968, GNorm = 0.2611, lr_0 = 4.0496e-04
Loss = 1.9466e-02, PNorm = 171.9227, GNorm = 0.2485, lr_0 = 4.0468e-04
Validation mae = 0.121285
Epoch 13
Loss = 1.4203e-02, PNorm = 171.9436, GNorm = 0.2296, lr_0 = 4.0440e-04
Loss = 1.5106e-02, PNorm = 171.9607, GNorm = 0.2847, lr_0 = 4.0413e-04
Loss = 2.6327e-02, PNorm = 171.9771, GNorm = 0.3301, lr_0 = 4.0385e-04
Loss = 1.7965e-02, PNorm = 171.9942, GNorm = 0.2936, lr_0 = 4.0357e-04
Loss = 1.4299e-02, PNorm = 172.0091, GNorm = 0.3245, lr_0 = 4.0330e-04
Loss = 1.9887e-02, PNorm = 172.0272, GNorm = 0.4813, lr_0 = 4.0302e-04
Loss = 1.5099e-02, PNorm = 172.0471, GNorm = 0.3282, lr_0 = 4.0274e-04
Loss = 1.8593e-02, PNorm = 172.0617, GNorm = 0.4938, lr_0 = 4.0247e-04
Loss = 2.0744e-02, PNorm = 172.0830, GNorm = 0.4542, lr_0 = 4.0219e-04
Loss = 1.2937e-02, PNorm = 172.1012, GNorm = 0.2077, lr_0 = 4.0192e-04
Loss = 1.5475e-02, PNorm = 172.1177, GNorm = 0.1533, lr_0 = 4.0164e-04
Loss = 1.4852e-02, PNorm = 172.1322, GNorm = 0.1341, lr_0 = 4.0137e-04
Loss = 1.4645e-02, PNorm = 172.1470, GNorm = 0.1717, lr_0 = 4.0109e-04
Loss = 1.5348e-02, PNorm = 172.1628, GNorm = 0.2706, lr_0 = 4.0082e-04
Loss = 1.7028e-02, PNorm = 172.1781, GNorm = 0.2999, lr_0 = 4.0054e-04
Loss = 1.4425e-02, PNorm = 172.1970, GNorm = 0.2005, lr_0 = 4.0027e-04
Loss = 1.2295e-02, PNorm = 172.2125, GNorm = 0.4545, lr_0 = 3.9999e-04
Loss = 1.2353e-02, PNorm = 172.2306, GNorm = 0.1723, lr_0 = 3.9972e-04
Loss = 1.3179e-02, PNorm = 172.2475, GNorm = 0.2506, lr_0 = 3.9945e-04
Loss = 1.5294e-02, PNorm = 172.2648, GNorm = 0.2155, lr_0 = 3.9917e-04
Loss = 1.6995e-02, PNorm = 172.2840, GNorm = 0.1819, lr_0 = 3.9890e-04
Loss = 1.3373e-02, PNorm = 172.2981, GNorm = 0.2324, lr_0 = 3.9863e-04
Loss = 1.7551e-02, PNorm = 172.3121, GNorm = 0.2909, lr_0 = 3.9835e-04
Loss = 1.4374e-02, PNorm = 172.3276, GNorm = 0.1660, lr_0 = 3.9808e-04
Loss = 1.8615e-02, PNorm = 172.3403, GNorm = 0.3110, lr_0 = 3.9781e-04
Loss = 1.4970e-02, PNorm = 172.3563, GNorm = 0.1777, lr_0 = 3.9753e-04
Loss = 1.7090e-02, PNorm = 172.3717, GNorm = 0.3032, lr_0 = 3.9726e-04
Loss = 1.2614e-02, PNorm = 172.3882, GNorm = 0.1972, lr_0 = 3.9699e-04
Loss = 1.9761e-02, PNorm = 172.4037, GNorm = 0.1727, lr_0 = 3.9672e-04
Loss = 1.6489e-02, PNorm = 172.4198, GNorm = 0.3089, lr_0 = 3.9645e-04
Loss = 1.6863e-02, PNorm = 172.4385, GNorm = 0.2994, lr_0 = 3.9617e-04
Loss = 1.4350e-02, PNorm = 172.4574, GNorm = 0.3337, lr_0 = 3.9590e-04
Loss = 1.6547e-02, PNorm = 172.4763, GNorm = 0.2664, lr_0 = 3.9563e-04
Loss = 1.4279e-02, PNorm = 172.4933, GNorm = 0.6848, lr_0 = 3.9536e-04
Loss = 1.8119e-02, PNorm = 172.5075, GNorm = 0.4981, lr_0 = 3.9509e-04
Loss = 1.4942e-02, PNorm = 172.5237, GNorm = 0.2144, lr_0 = 3.9482e-04
Loss = 1.4928e-02, PNorm = 172.5414, GNorm = 0.3054, lr_0 = 3.9455e-04
Loss = 1.7940e-02, PNorm = 172.5593, GNorm = 0.3206, lr_0 = 3.9428e-04
Loss = 1.3283e-02, PNorm = 172.5764, GNorm = 0.2943, lr_0 = 3.9401e-04
Loss = 1.7474e-02, PNorm = 172.5944, GNorm = 0.1682, lr_0 = 3.9374e-04
Loss = 1.6028e-02, PNorm = 172.6101, GNorm = 0.2468, lr_0 = 3.9347e-04
Loss = 1.5922e-02, PNorm = 172.6270, GNorm = 0.1672, lr_0 = 3.9320e-04
Loss = 1.4349e-02, PNorm = 172.6443, GNorm = 0.4538, lr_0 = 3.9293e-04
Loss = 1.2237e-02, PNorm = 172.6631, GNorm = 0.1508, lr_0 = 3.9266e-04
Loss = 1.2913e-02, PNorm = 172.6833, GNorm = 0.4261, lr_0 = 3.9239e-04
Loss = 2.1340e-02, PNorm = 172.6997, GNorm = 0.9162, lr_0 = 3.9212e-04
Loss = 2.0380e-02, PNorm = 172.7110, GNorm = 0.3210, lr_0 = 3.9185e-04
Loss = 1.3452e-02, PNorm = 172.7260, GNorm = 0.2940, lr_0 = 3.9159e-04
Loss = 1.6924e-02, PNorm = 172.7428, GNorm = 0.2141, lr_0 = 3.9132e-04
Loss = 1.2282e-02, PNorm = 172.7603, GNorm = 0.2353, lr_0 = 3.9105e-04
Loss = 2.3272e-02, PNorm = 172.7813, GNorm = 0.4623, lr_0 = 3.9078e-04
Loss = 1.5184e-02, PNorm = 172.8025, GNorm = 0.2297, lr_0 = 3.9051e-04
Loss = 1.5676e-02, PNorm = 172.8198, GNorm = 0.2769, lr_0 = 3.9025e-04
Loss = 1.7397e-02, PNorm = 172.8372, GNorm = 0.3754, lr_0 = 3.8998e-04
Loss = 1.4942e-02, PNorm = 172.8531, GNorm = 0.1878, lr_0 = 3.8971e-04
Loss = 1.3451e-02, PNorm = 172.8722, GNorm = 0.0953, lr_0 = 3.8945e-04
Loss = 1.5217e-02, PNorm = 172.8899, GNorm = 0.2020, lr_0 = 3.8918e-04
Loss = 1.5629e-02, PNorm = 172.9065, GNorm = 0.5490, lr_0 = 3.8891e-04
Loss = 1.7246e-02, PNorm = 172.9209, GNorm = 0.2075, lr_0 = 3.8865e-04
Loss = 1.4155e-02, PNorm = 172.9388, GNorm = 0.2503, lr_0 = 3.8838e-04
Loss = 1.6758e-02, PNorm = 172.9595, GNorm = 0.2540, lr_0 = 3.8811e-04
Loss = 1.6198e-02, PNorm = 172.9766, GNorm = 0.3583, lr_0 = 3.8785e-04
Loss = 1.7659e-02, PNorm = 172.9954, GNorm = 0.6771, lr_0 = 3.8758e-04
Loss = 1.1785e-02, PNorm = 173.0116, GNorm = 0.2912, lr_0 = 3.8732e-04
Loss = 1.4409e-02, PNorm = 173.0302, GNorm = 0.3488, lr_0 = 3.8705e-04
Loss = 2.0957e-02, PNorm = 173.0491, GNorm = 0.3453, lr_0 = 3.8679e-04
Loss = 1.6823e-02, PNorm = 173.0677, GNorm = 0.2533, lr_0 = 3.8652e-04
Loss = 1.9701e-02, PNorm = 173.0853, GNorm = 0.2889, lr_0 = 3.8626e-04
Loss = 1.7641e-02, PNorm = 173.1011, GNorm = 0.2794, lr_0 = 3.8599e-04
Loss = 2.4203e-02, PNorm = 173.1211, GNorm = 0.5356, lr_0 = 3.8573e-04
Loss = 1.8374e-02, PNorm = 173.1426, GNorm = 0.3964, lr_0 = 3.8546e-04
Loss = 1.7694e-02, PNorm = 173.1656, GNorm = 0.3136, lr_0 = 3.8520e-04
Loss = 1.4497e-02, PNorm = 173.1860, GNorm = 0.1622, lr_0 = 3.8493e-04
Loss = 1.8064e-02, PNorm = 173.2050, GNorm = 0.3413, lr_0 = 3.8467e-04
Loss = 1.6344e-02, PNorm = 173.2228, GNorm = 0.2902, lr_0 = 3.8441e-04
Loss = 1.4165e-02, PNorm = 173.2405, GNorm = 0.2411, lr_0 = 3.8414e-04
Loss = 1.4636e-02, PNorm = 173.2601, GNorm = 0.2326, lr_0 = 3.8388e-04
Loss = 1.6176e-02, PNorm = 173.2798, GNorm = 0.1670, lr_0 = 3.8362e-04
Loss = 1.6660e-02, PNorm = 173.2950, GNorm = 0.6441, lr_0 = 3.8336e-04
Loss = 1.6929e-02, PNorm = 173.3104, GNorm = 0.7982, lr_0 = 3.8309e-04
Loss = 1.7655e-02, PNorm = 173.3312, GNorm = 0.5191, lr_0 = 3.8283e-04
Loss = 1.3918e-02, PNorm = 173.3529, GNorm = 0.2692, lr_0 = 3.8257e-04
Loss = 1.5084e-02, PNorm = 173.3716, GNorm = 0.3061, lr_0 = 3.8231e-04
Loss = 1.7553e-02, PNorm = 173.3865, GNorm = 0.4405, lr_0 = 3.8204e-04
Loss = 1.4983e-02, PNorm = 173.4030, GNorm = 0.2206, lr_0 = 3.8178e-04
Loss = 1.8203e-02, PNorm = 173.4234, GNorm = 0.1467, lr_0 = 3.8152e-04
Loss = 1.4790e-02, PNorm = 173.4441, GNorm = 0.2044, lr_0 = 3.8126e-04
Loss = 1.9223e-02, PNorm = 173.4628, GNorm = 0.2682, lr_0 = 3.8100e-04
Loss = 2.0544e-02, PNorm = 173.4830, GNorm = 0.3963, lr_0 = 3.8074e-04
Loss = 1.6045e-02, PNorm = 173.5024, GNorm = 0.5723, lr_0 = 3.8048e-04
Loss = 1.6497e-02, PNorm = 173.5194, GNorm = 0.2321, lr_0 = 3.8022e-04
Loss = 1.6034e-02, PNorm = 173.5369, GNorm = 0.2237, lr_0 = 3.7995e-04
Loss = 1.6822e-02, PNorm = 173.5529, GNorm = 0.3601, lr_0 = 3.7969e-04
Loss = 2.1145e-02, PNorm = 173.5780, GNorm = 0.2230, lr_0 = 3.7943e-04
Loss = 1.2401e-02, PNorm = 173.6012, GNorm = 0.2610, lr_0 = 3.7917e-04
Loss = 1.3872e-02, PNorm = 173.6230, GNorm = 0.3529, lr_0 = 3.7891e-04
Loss = 1.1843e-02, PNorm = 173.6453, GNorm = 0.2366, lr_0 = 3.7866e-04
Loss = 1.3282e-02, PNorm = 173.6673, GNorm = 0.3783, lr_0 = 3.7840e-04
Loss = 1.7493e-02, PNorm = 173.6880, GNorm = 0.3207, lr_0 = 3.7814e-04
Loss = 1.6108e-02, PNorm = 173.7080, GNorm = 1.0111, lr_0 = 3.7788e-04
Loss = 1.9152e-02, PNorm = 173.7285, GNorm = 0.5110, lr_0 = 3.7762e-04
Loss = 1.5238e-02, PNorm = 173.7525, GNorm = 0.3672, lr_0 = 3.7736e-04
Loss = 1.7939e-02, PNorm = 173.7756, GNorm = 0.9776, lr_0 = 3.7710e-04
Loss = 2.2851e-02, PNorm = 173.7899, GNorm = 0.2300, lr_0 = 3.7684e-04
Loss = 3.6190e-02, PNorm = 173.8161, GNorm = 0.4739, lr_0 = 3.7659e-04
Loss = 1.6628e-02, PNorm = 173.8421, GNorm = 0.8740, lr_0 = 3.7633e-04
Loss = 2.4246e-02, PNorm = 173.8691, GNorm = 0.5511, lr_0 = 3.7607e-04
Loss = 1.6778e-02, PNorm = 173.8963, GNorm = 0.3650, lr_0 = 3.7581e-04
Loss = 1.4144e-02, PNorm = 173.9197, GNorm = 0.1845, lr_0 = 3.7555e-04
Loss = 1.5610e-02, PNorm = 173.9394, GNorm = 0.1546, lr_0 = 3.7530e-04
Loss = 1.4211e-02, PNorm = 173.9606, GNorm = 0.2116, lr_0 = 3.7504e-04
Loss = 1.9855e-02, PNorm = 173.9812, GNorm = 0.1854, lr_0 = 3.7478e-04
Loss = 1.5178e-02, PNorm = 174.0034, GNorm = 0.1653, lr_0 = 3.7453e-04
Loss = 1.4122e-02, PNorm = 174.0271, GNorm = 0.3960, lr_0 = 3.7427e-04
Loss = 1.5599e-02, PNorm = 174.0485, GNorm = 0.1856, lr_0 = 3.7401e-04
Loss = 1.3613e-02, PNorm = 174.0687, GNorm = 0.2496, lr_0 = 3.7376e-04
Loss = 1.7837e-02, PNorm = 174.0881, GNorm = 0.2702, lr_0 = 3.7350e-04
Loss = 1.5619e-02, PNorm = 174.1091, GNorm = 0.2736, lr_0 = 3.7325e-04
Loss = 2.5023e-02, PNorm = 174.1288, GNorm = 0.2363, lr_0 = 3.7299e-04
Loss = 1.9380e-02, PNorm = 174.1512, GNorm = 0.2907, lr_0 = 3.7273e-04
Validation mae = 0.121495
Epoch 14
Loss = 1.6881e-02, PNorm = 174.1697, GNorm = 0.6658, lr_0 = 3.7248e-04
Loss = 1.6984e-02, PNorm = 174.1837, GNorm = 0.1899, lr_0 = 3.7222e-04
Loss = 1.9600e-02, PNorm = 174.1995, GNorm = 0.6870, lr_0 = 3.7197e-04
Loss = 1.4891e-02, PNorm = 174.2129, GNorm = 0.4280, lr_0 = 3.7171e-04
Loss = 1.3367e-02, PNorm = 174.2304, GNorm = 1.2574, lr_0 = 3.7146e-04
Loss = 1.7513e-02, PNorm = 174.2439, GNorm = 0.4485, lr_0 = 3.7120e-04
Loss = 1.6512e-02, PNorm = 174.2599, GNorm = 0.2617, lr_0 = 3.7095e-04
Loss = 1.6231e-02, PNorm = 174.2733, GNorm = 0.3269, lr_0 = 3.7070e-04
Loss = 2.1010e-02, PNorm = 174.2883, GNorm = 0.1732, lr_0 = 3.7044e-04
Loss = 1.7698e-02, PNorm = 174.3041, GNorm = 0.2379, lr_0 = 3.7019e-04
Loss = 1.4584e-02, PNorm = 174.3171, GNorm = 0.2508, lr_0 = 3.6993e-04
Loss = 1.7119e-02, PNorm = 174.3287, GNorm = 0.2482, lr_0 = 3.6968e-04
Loss = 1.7052e-02, PNorm = 174.3427, GNorm = 0.2994, lr_0 = 3.6943e-04
Loss = 1.2362e-02, PNorm = 174.3568, GNorm = 0.3900, lr_0 = 3.6917e-04
Loss = 1.7579e-02, PNorm = 174.3708, GNorm = 0.3026, lr_0 = 3.6892e-04
Loss = 1.3747e-02, PNorm = 174.3879, GNorm = 0.5346, lr_0 = 3.6867e-04
Loss = 1.4494e-02, PNorm = 174.4060, GNorm = 0.1696, lr_0 = 3.6842e-04
Loss = 1.1416e-02, PNorm = 174.4220, GNorm = 0.1765, lr_0 = 3.6816e-04
Loss = 1.5906e-02, PNorm = 174.4369, GNorm = 0.3798, lr_0 = 3.6791e-04
Loss = 2.0921e-02, PNorm = 174.4530, GNorm = 0.1992, lr_0 = 3.6766e-04
Loss = 1.5405e-02, PNorm = 174.4684, GNorm = 0.2512, lr_0 = 3.6741e-04
Loss = 1.2674e-02, PNorm = 174.4856, GNorm = 0.1748, lr_0 = 3.6716e-04
Loss = 1.2604e-02, PNorm = 174.4973, GNorm = 0.1771, lr_0 = 3.6690e-04
Loss = 9.9120e-03, PNorm = 174.5082, GNorm = 0.1997, lr_0 = 3.6665e-04
Loss = 1.8578e-02, PNorm = 174.5207, GNorm = 0.4245, lr_0 = 3.6640e-04
Loss = 1.4331e-02, PNorm = 174.5348, GNorm = 0.1667, lr_0 = 3.6615e-04
Loss = 1.3401e-02, PNorm = 174.5462, GNorm = 0.2775, lr_0 = 3.6590e-04
Loss = 1.1997e-02, PNorm = 174.5574, GNorm = 0.2529, lr_0 = 3.6565e-04
Loss = 1.3751e-02, PNorm = 174.5718, GNorm = 0.2090, lr_0 = 3.6540e-04
Loss = 1.7342e-02, PNorm = 174.5856, GNorm = 0.1420, lr_0 = 3.6515e-04
Loss = 9.2588e-03, PNorm = 174.5988, GNorm = 0.2368, lr_0 = 3.6490e-04
Loss = 1.1535e-02, PNorm = 174.6087, GNorm = 0.2417, lr_0 = 3.6465e-04
Loss = 1.3208e-02, PNorm = 174.6222, GNorm = 0.1508, lr_0 = 3.6440e-04
Loss = 1.1264e-02, PNorm = 174.6362, GNorm = 0.2005, lr_0 = 3.6415e-04
Loss = 1.0305e-02, PNorm = 174.6512, GNorm = 0.3921, lr_0 = 3.6390e-04
Loss = 1.2807e-02, PNorm = 174.6632, GNorm = 0.2017, lr_0 = 3.6365e-04
Loss = 1.7714e-02, PNorm = 174.6745, GNorm = 0.3453, lr_0 = 3.6340e-04
Loss = 1.4058e-02, PNorm = 174.6895, GNorm = 0.3714, lr_0 = 3.6315e-04
Loss = 1.4132e-02, PNorm = 174.7045, GNorm = 0.3056, lr_0 = 3.6290e-04
Loss = 1.1251e-02, PNorm = 174.7184, GNorm = 0.2748, lr_0 = 3.6266e-04
Loss = 1.3491e-02, PNorm = 174.7329, GNorm = 0.3654, lr_0 = 3.6241e-04
Loss = 1.2660e-02, PNorm = 174.7490, GNorm = 0.2891, lr_0 = 3.6216e-04
Loss = 1.2458e-02, PNorm = 174.7657, GNorm = 0.2990, lr_0 = 3.6191e-04
Loss = 1.3098e-02, PNorm = 174.7812, GNorm = 0.1591, lr_0 = 3.6166e-04
Loss = 1.4550e-02, PNorm = 174.7979, GNorm = 0.3903, lr_0 = 3.6141e-04
Loss = 1.4191e-02, PNorm = 174.8135, GNorm = 0.2698, lr_0 = 3.6117e-04
Loss = 1.2269e-02, PNorm = 174.8277, GNorm = 0.1158, lr_0 = 3.6092e-04
Loss = 1.1990e-02, PNorm = 174.8429, GNorm = 0.1994, lr_0 = 3.6067e-04
Loss = 1.3336e-02, PNorm = 174.8582, GNorm = 0.1895, lr_0 = 3.6043e-04
Loss = 1.7940e-02, PNorm = 174.8743, GNorm = 0.3365, lr_0 = 3.6018e-04
Loss = 1.2838e-02, PNorm = 174.8916, GNorm = 0.1830, lr_0 = 3.5993e-04
Loss = 1.1793e-02, PNorm = 174.9092, GNorm = 0.1367, lr_0 = 3.5969e-04
Loss = 1.6010e-02, PNorm = 174.9236, GNorm = 1.2270, lr_0 = 3.5944e-04
Loss = 1.4569e-02, PNorm = 174.9373, GNorm = 0.3169, lr_0 = 3.5919e-04
Loss = 1.5687e-02, PNorm = 174.9534, GNorm = 0.2817, lr_0 = 3.5895e-04
Loss = 1.5971e-02, PNorm = 174.9661, GNorm = 0.2620, lr_0 = 3.5870e-04
Loss = 1.1392e-02, PNorm = 174.9814, GNorm = 0.1491, lr_0 = 3.5845e-04
Loss = 1.2054e-02, PNorm = 174.9960, GNorm = 0.1394, lr_0 = 3.5821e-04
Loss = 2.1889e-02, PNorm = 175.0142, GNorm = 0.2372, lr_0 = 3.5796e-04
Loss = 2.0293e-02, PNorm = 175.0279, GNorm = 0.1477, lr_0 = 3.5772e-04
Loss = 1.3147e-02, PNorm = 175.0415, GNorm = 0.1773, lr_0 = 3.5747e-04
Loss = 1.1761e-02, PNorm = 175.0578, GNorm = 0.2578, lr_0 = 3.5723e-04
Loss = 1.2341e-02, PNorm = 175.0758, GNorm = 0.1498, lr_0 = 3.5698e-04
Loss = 1.0747e-02, PNorm = 175.0930, GNorm = 0.2518, lr_0 = 3.5674e-04
Loss = 1.4426e-02, PNorm = 175.1066, GNorm = 0.2359, lr_0 = 3.5650e-04
Loss = 1.7026e-02, PNorm = 175.1220, GNorm = 0.7402, lr_0 = 3.5625e-04
Loss = 1.5956e-02, PNorm = 175.1396, GNorm = 0.5534, lr_0 = 3.5601e-04
Loss = 1.2158e-02, PNorm = 175.1554, GNorm = 0.2289, lr_0 = 3.5576e-04
Loss = 1.6247e-02, PNorm = 175.1698, GNorm = 0.2381, lr_0 = 3.5552e-04
Loss = 1.6931e-02, PNorm = 175.1846, GNorm = 0.1711, lr_0 = 3.5528e-04
Loss = 2.2286e-02, PNorm = 175.1987, GNorm = 0.2828, lr_0 = 3.5503e-04
Loss = 1.5347e-02, PNorm = 175.2152, GNorm = 0.2498, lr_0 = 3.5479e-04
Loss = 1.5869e-02, PNorm = 175.2340, GNorm = 0.1726, lr_0 = 3.5455e-04
Loss = 1.5774e-02, PNorm = 175.2505, GNorm = 0.2350, lr_0 = 3.5430e-04
Loss = 1.7141e-02, PNorm = 175.2687, GNorm = 0.2415, lr_0 = 3.5406e-04
Loss = 1.1403e-02, PNorm = 175.2843, GNorm = 0.3154, lr_0 = 3.5382e-04
Loss = 1.1483e-02, PNorm = 175.2993, GNorm = 0.1495, lr_0 = 3.5358e-04
Loss = 1.9016e-02, PNorm = 175.3158, GNorm = 0.1941, lr_0 = 3.5333e-04
Loss = 1.5441e-02, PNorm = 175.3325, GNorm = 0.3094, lr_0 = 3.5309e-04
Loss = 1.1834e-02, PNorm = 175.3493, GNorm = 0.4228, lr_0 = 3.5285e-04
Loss = 1.5199e-02, PNorm = 175.3638, GNorm = 0.1460, lr_0 = 3.5261e-04
Loss = 1.3161e-02, PNorm = 175.3801, GNorm = 0.1686, lr_0 = 3.5237e-04
Loss = 1.2601e-02, PNorm = 175.3972, GNorm = 0.2517, lr_0 = 3.5212e-04
Loss = 1.1121e-02, PNorm = 175.4148, GNorm = 0.1218, lr_0 = 3.5188e-04
Loss = 1.6998e-02, PNorm = 175.4323, GNorm = 1.1028, lr_0 = 3.5164e-04
Loss = 1.5277e-02, PNorm = 175.4525, GNorm = 0.4726, lr_0 = 3.5140e-04
Loss = 1.2698e-02, PNorm = 175.4695, GNorm = 0.2505, lr_0 = 3.5116e-04
Loss = 9.3012e-03, PNorm = 175.4838, GNorm = 0.2376, lr_0 = 3.5092e-04
Loss = 1.7367e-02, PNorm = 175.4984, GNorm = 0.3162, lr_0 = 3.5068e-04
Loss = 1.3325e-02, PNorm = 175.5188, GNorm = 0.1578, lr_0 = 3.5044e-04
Loss = 1.4721e-02, PNorm = 175.5382, GNorm = 0.2539, lr_0 = 3.5020e-04
Loss = 1.3269e-02, PNorm = 175.5559, GNorm = 0.3339, lr_0 = 3.4996e-04
Loss = 1.1272e-02, PNorm = 175.5726, GNorm = 0.5582, lr_0 = 3.4972e-04
Loss = 1.2286e-02, PNorm = 175.5892, GNorm = 0.2110, lr_0 = 3.4948e-04
Loss = 2.3622e-02, PNorm = 175.6065, GNorm = 0.3495, lr_0 = 3.4924e-04
Loss = 1.2774e-02, PNorm = 175.6260, GNorm = 0.3379, lr_0 = 3.4900e-04
Loss = 1.3334e-02, PNorm = 175.6450, GNorm = 0.4721, lr_0 = 3.4876e-04
Loss = 1.1030e-02, PNorm = 175.6621, GNorm = 0.2104, lr_0 = 3.4852e-04
Loss = 1.4537e-02, PNorm = 175.6801, GNorm = 0.6799, lr_0 = 3.4828e-04
Loss = 1.4250e-02, PNorm = 175.6975, GNorm = 0.3113, lr_0 = 3.4805e-04
Loss = 1.3637e-02, PNorm = 175.7135, GNorm = 0.2927, lr_0 = 3.4781e-04
Loss = 1.2975e-02, PNorm = 175.7311, GNorm = 0.3407, lr_0 = 3.4757e-04
Loss = 1.0463e-02, PNorm = 175.7469, GNorm = 0.1295, lr_0 = 3.4733e-04
Loss = 1.8065e-02, PNorm = 175.7639, GNorm = 0.6210, lr_0 = 3.4709e-04
Loss = 2.0062e-02, PNorm = 175.7781, GNorm = 0.4368, lr_0 = 3.4686e-04
Loss = 1.7071e-02, PNorm = 175.7942, GNorm = 0.5118, lr_0 = 3.4662e-04
Loss = 1.6427e-02, PNorm = 175.8049, GNorm = 0.2165, lr_0 = 3.4638e-04
Loss = 1.1801e-02, PNorm = 175.8215, GNorm = 0.3848, lr_0 = 3.4614e-04
Loss = 1.2551e-02, PNorm = 175.8410, GNorm = 0.2777, lr_0 = 3.4591e-04
Loss = 1.2750e-02, PNorm = 175.8576, GNorm = 0.2763, lr_0 = 3.4567e-04
Loss = 1.2363e-02, PNorm = 175.8755, GNorm = 0.1729, lr_0 = 3.4543e-04
Loss = 1.0472e-02, PNorm = 175.8922, GNorm = 0.5165, lr_0 = 3.4520e-04
Loss = 1.3105e-02, PNorm = 175.9117, GNorm = 0.1740, lr_0 = 3.4496e-04
Loss = 1.2417e-02, PNorm = 175.9316, GNorm = 0.2735, lr_0 = 3.4472e-04
Loss = 1.2162e-02, PNorm = 175.9525, GNorm = 0.3379, lr_0 = 3.4449e-04
Loss = 1.5767e-02, PNorm = 175.9695, GNorm = 0.3722, lr_0 = 3.4425e-04
Loss = 1.1603e-02, PNorm = 175.9879, GNorm = 0.3443, lr_0 = 3.4402e-04
Loss = 1.3016e-02, PNorm = 176.0041, GNorm = 0.2865, lr_0 = 3.4378e-04
Loss = 1.2419e-02, PNorm = 176.0219, GNorm = 0.3854, lr_0 = 3.4354e-04
Loss = 1.1198e-02, PNorm = 176.0409, GNorm = 0.3240, lr_0 = 3.4331e-04
Validation mae = 0.121724
Epoch 15
Loss = 1.1767e-02, PNorm = 176.0529, GNorm = 0.1621, lr_0 = 3.4307e-04
Loss = 1.4195e-02, PNorm = 176.0641, GNorm = 0.6050, lr_0 = 3.4284e-04
Loss = 1.5255e-02, PNorm = 176.0783, GNorm = 0.1895, lr_0 = 3.4260e-04
Loss = 1.3722e-02, PNorm = 176.0912, GNorm = 0.4010, lr_0 = 3.4237e-04
Loss = 1.0932e-02, PNorm = 176.1025, GNorm = 0.2286, lr_0 = 3.4213e-04
Loss = 1.1033e-02, PNorm = 176.1137, GNorm = 0.3488, lr_0 = 3.4190e-04
Loss = 1.2965e-02, PNorm = 176.1270, GNorm = 0.1252, lr_0 = 3.4167e-04
Loss = 1.4827e-02, PNorm = 176.1436, GNorm = 0.1459, lr_0 = 3.4143e-04
Loss = 1.0177e-02, PNorm = 176.1581, GNorm = 0.2875, lr_0 = 3.4120e-04
Loss = 1.2901e-02, PNorm = 176.1700, GNorm = 0.4161, lr_0 = 3.4096e-04
Loss = 1.0561e-02, PNorm = 176.1800, GNorm = 0.2877, lr_0 = 3.4073e-04
Loss = 1.1630e-02, PNorm = 176.1931, GNorm = 0.1915, lr_0 = 3.4050e-04
Loss = 1.2812e-02, PNorm = 176.2046, GNorm = 0.5233, lr_0 = 3.4026e-04
Loss = 1.3561e-02, PNorm = 176.2163, GNorm = 0.3288, lr_0 = 3.4003e-04
Loss = 1.2150e-02, PNorm = 176.2280, GNorm = 0.1697, lr_0 = 3.3980e-04
Loss = 9.3271e-03, PNorm = 176.2411, GNorm = 0.3771, lr_0 = 3.3956e-04
Loss = 1.0297e-02, PNorm = 176.2529, GNorm = 0.2252, lr_0 = 3.3933e-04
Loss = 9.9378e-03, PNorm = 176.2648, GNorm = 0.1683, lr_0 = 3.3910e-04
Loss = 1.2191e-02, PNorm = 176.2774, GNorm = 0.3021, lr_0 = 3.3887e-04
Loss = 1.6859e-02, PNorm = 176.2873, GNorm = 0.3312, lr_0 = 3.3864e-04
Loss = 1.3681e-02, PNorm = 176.2989, GNorm = 0.2908, lr_0 = 3.3840e-04
Loss = 1.0294e-02, PNorm = 176.3114, GNorm = 0.3659, lr_0 = 3.3817e-04
Loss = 1.1644e-02, PNorm = 176.3246, GNorm = 0.3595, lr_0 = 3.3794e-04
Loss = 9.0700e-03, PNorm = 176.3379, GNorm = 0.2096, lr_0 = 3.3771e-04
Loss = 1.3421e-02, PNorm = 176.3497, GNorm = 0.2283, lr_0 = 3.3748e-04
Loss = 1.0882e-02, PNorm = 176.3603, GNorm = 0.2452, lr_0 = 3.3725e-04
Loss = 1.1258e-02, PNorm = 176.3744, GNorm = 0.3359, lr_0 = 3.3701e-04
Loss = 1.1354e-02, PNorm = 176.3887, GNorm = 0.3502, lr_0 = 3.3678e-04
Loss = 1.0509e-02, PNorm = 176.4003, GNorm = 0.1155, lr_0 = 3.3655e-04
Loss = 1.0542e-02, PNorm = 176.4114, GNorm = 0.1378, lr_0 = 3.3632e-04
Loss = 1.0270e-02, PNorm = 176.4227, GNorm = 0.4671, lr_0 = 3.3609e-04
Loss = 1.3130e-02, PNorm = 176.4353, GNorm = 0.2917, lr_0 = 3.3586e-04
Loss = 1.6714e-02, PNorm = 176.4473, GNorm = 0.2404, lr_0 = 3.3563e-04
Loss = 8.9242e-03, PNorm = 176.4581, GNorm = 0.1665, lr_0 = 3.3540e-04
Loss = 1.5084e-02, PNorm = 176.4696, GNorm = 0.6624, lr_0 = 3.3517e-04
Loss = 9.0228e-03, PNorm = 176.4826, GNorm = 0.4478, lr_0 = 3.3494e-04
Loss = 1.0931e-02, PNorm = 176.4957, GNorm = 0.3170, lr_0 = 3.3471e-04
Loss = 1.2954e-02, PNorm = 176.5096, GNorm = 0.1965, lr_0 = 3.3448e-04
Loss = 1.1609e-02, PNorm = 176.5229, GNorm = 0.5082, lr_0 = 3.3425e-04
Loss = 8.6755e-03, PNorm = 176.5351, GNorm = 0.0917, lr_0 = 3.3403e-04
Loss = 8.0173e-03, PNorm = 176.5490, GNorm = 0.1514, lr_0 = 3.3380e-04
Loss = 1.1457e-02, PNorm = 176.5618, GNorm = 0.1245, lr_0 = 3.3357e-04
Loss = 1.4777e-02, PNorm = 176.5746, GNorm = 0.8219, lr_0 = 3.3334e-04
Loss = 1.7508e-02, PNorm = 176.5937, GNorm = 0.2134, lr_0 = 3.3311e-04
Loss = 1.6055e-02, PNorm = 176.6096, GNorm = 0.1953, lr_0 = 3.3288e-04
Loss = 1.1414e-02, PNorm = 176.6259, GNorm = 0.3553, lr_0 = 3.3265e-04
Loss = 1.2518e-02, PNorm = 176.6390, GNorm = 0.4405, lr_0 = 3.3243e-04
Loss = 1.3099e-02, PNorm = 176.6495, GNorm = 0.3054, lr_0 = 3.3220e-04
Loss = 1.5349e-02, PNorm = 176.6612, GNorm = 0.1750, lr_0 = 3.3197e-04
Loss = 9.7686e-03, PNorm = 176.6727, GNorm = 0.3367, lr_0 = 3.3174e-04
Loss = 1.2324e-02, PNorm = 176.6861, GNorm = 0.2243, lr_0 = 3.3152e-04
Loss = 1.0170e-02, PNorm = 176.6991, GNorm = 0.2412, lr_0 = 3.3129e-04
Loss = 9.9853e-03, PNorm = 176.7142, GNorm = 0.2690, lr_0 = 3.3106e-04
Loss = 1.1022e-02, PNorm = 176.7260, GNorm = 0.2491, lr_0 = 3.3084e-04
Loss = 1.0014e-02, PNorm = 176.7403, GNorm = 0.3033, lr_0 = 3.3061e-04
Loss = 1.2643e-02, PNorm = 176.7536, GNorm = 0.3330, lr_0 = 3.3038e-04
Loss = 1.2797e-02, PNorm = 176.7645, GNorm = 0.1918, lr_0 = 3.3016e-04
Loss = 1.5825e-02, PNorm = 176.7760, GNorm = 0.1170, lr_0 = 3.2993e-04
Loss = 9.5178e-03, PNorm = 176.7894, GNorm = 0.2290, lr_0 = 3.2970e-04
Loss = 1.5054e-02, PNorm = 176.8018, GNorm = 0.3783, lr_0 = 3.2948e-04
Loss = 1.3467e-02, PNorm = 176.8160, GNorm = 0.1970, lr_0 = 3.2925e-04
Loss = 1.1110e-02, PNorm = 176.8298, GNorm = 0.1639, lr_0 = 3.2903e-04
Loss = 9.0288e-03, PNorm = 176.8433, GNorm = 0.1914, lr_0 = 3.2880e-04
Loss = 1.3095e-02, PNorm = 176.8571, GNorm = 0.1051, lr_0 = 3.2858e-04
Loss = 1.2657e-02, PNorm = 176.8708, GNorm = 0.5480, lr_0 = 3.2835e-04
Loss = 1.1525e-02, PNorm = 176.8844, GNorm = 0.1404, lr_0 = 3.2813e-04
Loss = 9.4770e-03, PNorm = 176.8983, GNorm = 0.6349, lr_0 = 3.2790e-04
Loss = 8.8818e-03, PNorm = 176.9113, GNorm = 0.1600, lr_0 = 3.2768e-04
Loss = 1.2471e-02, PNorm = 176.9222, GNorm = 0.2563, lr_0 = 3.2745e-04
Loss = 1.4853e-02, PNorm = 176.9345, GNorm = 0.2368, lr_0 = 3.2723e-04
Loss = 1.1228e-02, PNorm = 176.9473, GNorm = 0.5283, lr_0 = 3.2700e-04
Loss = 1.1988e-02, PNorm = 176.9597, GNorm = 0.1803, lr_0 = 3.2678e-04
Loss = 9.1598e-03, PNorm = 176.9730, GNorm = 0.5777, lr_0 = 3.2656e-04
Loss = 1.0368e-02, PNorm = 176.9876, GNorm = 0.1427, lr_0 = 3.2633e-04
Loss = 1.3666e-02, PNorm = 177.0027, GNorm = 0.1606, lr_0 = 3.2611e-04
Loss = 9.0392e-03, PNorm = 177.0180, GNorm = 0.1706, lr_0 = 3.2589e-04
Loss = 1.3111e-02, PNorm = 177.0332, GNorm = 0.2071, lr_0 = 3.2566e-04
Loss = 1.7578e-02, PNorm = 177.0460, GNorm = 0.3162, lr_0 = 3.2544e-04
Loss = 1.3616e-02, PNorm = 177.0582, GNorm = 0.1498, lr_0 = 3.2522e-04
Loss = 1.2164e-02, PNorm = 177.0711, GNorm = 0.1028, lr_0 = 3.2499e-04
Loss = 1.2339e-02, PNorm = 177.0860, GNorm = 0.1923, lr_0 = 3.2477e-04
Loss = 9.2265e-03, PNorm = 177.1014, GNorm = 0.4903, lr_0 = 3.2455e-04
Loss = 9.8906e-03, PNorm = 177.1124, GNorm = 0.4015, lr_0 = 3.2433e-04
Loss = 1.5090e-02, PNorm = 177.1230, GNorm = 0.2178, lr_0 = 3.2410e-04
Loss = 9.3918e-03, PNorm = 177.1329, GNorm = 0.3007, lr_0 = 3.2388e-04
Loss = 1.3923e-02, PNorm = 177.1448, GNorm = 0.4000, lr_0 = 3.2366e-04
Loss = 1.2727e-02, PNorm = 177.1571, GNorm = 0.4399, lr_0 = 3.2344e-04
Loss = 1.1431e-02, PNorm = 177.1722, GNorm = 0.2623, lr_0 = 3.2322e-04
Loss = 1.0816e-02, PNorm = 177.1850, GNorm = 0.2921, lr_0 = 3.2300e-04
Loss = 1.6600e-02, PNorm = 177.2024, GNorm = 0.8310, lr_0 = 3.2277e-04
Loss = 9.3222e-03, PNorm = 177.2206, GNorm = 0.1354, lr_0 = 3.2255e-04
Loss = 1.2389e-02, PNorm = 177.2382, GNorm = 0.8096, lr_0 = 3.2233e-04
Loss = 1.4428e-02, PNorm = 177.2532, GNorm = 0.1478, lr_0 = 3.2211e-04
Loss = 1.5466e-02, PNorm = 177.2717, GNorm = 0.2321, lr_0 = 3.2189e-04
Loss = 1.1738e-02, PNorm = 177.2851, GNorm = 0.2233, lr_0 = 3.2167e-04
Loss = 1.0282e-02, PNorm = 177.2982, GNorm = 0.1593, lr_0 = 3.2145e-04
Loss = 8.9657e-03, PNorm = 177.3094, GNorm = 0.3699, lr_0 = 3.2123e-04
Loss = 1.2033e-02, PNorm = 177.3222, GNorm = 0.3042, lr_0 = 3.2101e-04
Loss = 1.1932e-02, PNorm = 177.3350, GNorm = 0.2214, lr_0 = 3.2079e-04
Loss = 1.3152e-02, PNorm = 177.3488, GNorm = 0.2762, lr_0 = 3.2057e-04
Loss = 1.2585e-02, PNorm = 177.3634, GNorm = 0.3156, lr_0 = 3.2035e-04
Loss = 1.3253e-02, PNorm = 177.3812, GNorm = 0.3105, lr_0 = 3.2013e-04
Loss = 9.8432e-03, PNorm = 177.3941, GNorm = 0.3038, lr_0 = 3.1991e-04
Loss = 1.0559e-02, PNorm = 177.4092, GNorm = 0.1698, lr_0 = 3.1969e-04
Loss = 1.7478e-02, PNorm = 177.4212, GNorm = 0.1273, lr_0 = 3.1947e-04
Loss = 1.2547e-02, PNorm = 177.4350, GNorm = 0.4484, lr_0 = 3.1925e-04
Loss = 1.4832e-02, PNorm = 177.4529, GNorm = 0.3323, lr_0 = 3.1904e-04
Loss = 1.8767e-02, PNorm = 177.4679, GNorm = 0.2090, lr_0 = 3.1882e-04
Loss = 1.2784e-02, PNorm = 177.4869, GNorm = 0.3447, lr_0 = 3.1860e-04
Loss = 1.2445e-02, PNorm = 177.5020, GNorm = 0.2727, lr_0 = 3.1838e-04
Loss = 1.7396e-02, PNorm = 177.5159, GNorm = 0.4434, lr_0 = 3.1816e-04
Loss = 1.1815e-02, PNorm = 177.5330, GNorm = 0.2521, lr_0 = 3.1794e-04
Loss = 2.8041e-02, PNorm = 177.5494, GNorm = 0.3177, lr_0 = 3.1773e-04
Loss = 1.2709e-02, PNorm = 177.5629, GNorm = 0.6299, lr_0 = 3.1751e-04
Loss = 1.2037e-02, PNorm = 177.5771, GNorm = 0.1999, lr_0 = 3.1729e-04
Loss = 1.1908e-02, PNorm = 177.5928, GNorm = 0.2927, lr_0 = 3.1707e-04
Loss = 1.0868e-02, PNorm = 177.6073, GNorm = 0.2236, lr_0 = 3.1686e-04
Loss = 1.0751e-02, PNorm = 177.6233, GNorm = 0.2440, lr_0 = 3.1664e-04
Loss = 1.2888e-02, PNorm = 177.6374, GNorm = 0.3039, lr_0 = 3.1642e-04
Loss = 1.5238e-02, PNorm = 177.6481, GNorm = 0.1474, lr_0 = 3.1621e-04
Validation mae = 0.121302
Epoch 16
Loss = 1.3610e-02, PNorm = 177.6612, GNorm = 0.1756, lr_0 = 3.1599e-04
Loss = 1.1401e-02, PNorm = 177.6707, GNorm = 0.6544, lr_0 = 3.1577e-04
Loss = 9.6659e-03, PNorm = 177.6806, GNorm = 0.2626, lr_0 = 3.1556e-04
Loss = 9.7354e-03, PNorm = 177.6911, GNorm = 0.2044, lr_0 = 3.1534e-04
Loss = 1.1822e-02, PNorm = 177.7026, GNorm = 0.2930, lr_0 = 3.1512e-04
Loss = 8.4239e-03, PNorm = 177.7159, GNorm = 0.1282, lr_0 = 3.1491e-04
Loss = 1.0814e-02, PNorm = 177.7278, GNorm = 0.5843, lr_0 = 3.1469e-04
Loss = 1.6398e-02, PNorm = 177.7394, GNorm = 0.2013, lr_0 = 3.1448e-04
Loss = 9.7745e-03, PNorm = 177.7458, GNorm = 0.3664, lr_0 = 3.1426e-04
Loss = 8.7718e-03, PNorm = 177.7571, GNorm = 0.1809, lr_0 = 3.1405e-04
Loss = 1.3853e-02, PNorm = 177.7652, GNorm = 0.4109, lr_0 = 3.1383e-04
Loss = 9.4192e-03, PNorm = 177.7718, GNorm = 0.2302, lr_0 = 3.1362e-04
Loss = 1.0867e-02, PNorm = 177.7821, GNorm = 0.3017, lr_0 = 3.1340e-04
Loss = 1.0534e-02, PNorm = 177.7926, GNorm = 0.2178, lr_0 = 3.1319e-04
Loss = 8.6448e-03, PNorm = 177.8030, GNorm = 0.1499, lr_0 = 3.1297e-04
Loss = 1.3109e-02, PNorm = 177.8094, GNorm = 0.3012, lr_0 = 3.1276e-04
Loss = 1.5091e-02, PNorm = 177.8201, GNorm = 0.4000, lr_0 = 3.1254e-04
Loss = 8.0981e-03, PNorm = 177.8290, GNorm = 0.1683, lr_0 = 3.1233e-04
Loss = 1.1182e-02, PNorm = 177.8405, GNorm = 0.2562, lr_0 = 3.1212e-04
Loss = 1.1154e-02, PNorm = 177.8512, GNorm = 0.1507, lr_0 = 3.1190e-04
Loss = 9.0341e-03, PNorm = 177.8611, GNorm = 0.3813, lr_0 = 3.1169e-04
Loss = 1.1617e-02, PNorm = 177.8743, GNorm = 0.2117, lr_0 = 3.1147e-04
Loss = 1.0528e-02, PNorm = 177.8864, GNorm = 0.2336, lr_0 = 3.1126e-04
Loss = 1.0541e-02, PNorm = 177.8988, GNorm = 0.2227, lr_0 = 3.1105e-04
Loss = 1.0238e-02, PNorm = 177.9098, GNorm = 0.3305, lr_0 = 3.1083e-04
Loss = 1.9001e-02, PNorm = 177.9196, GNorm = 0.2123, lr_0 = 3.1062e-04
Loss = 7.4793e-03, PNorm = 177.9313, GNorm = 0.1292, lr_0 = 3.1041e-04
Loss = 1.0233e-02, PNorm = 177.9405, GNorm = 0.1790, lr_0 = 3.1020e-04
Loss = 9.8336e-03, PNorm = 177.9485, GNorm = 0.2481, lr_0 = 3.0998e-04
Loss = 7.8009e-03, PNorm = 177.9581, GNorm = 0.3817, lr_0 = 3.0977e-04
Loss = 1.0371e-02, PNorm = 177.9666, GNorm = 0.2492, lr_0 = 3.0956e-04
Loss = 8.9777e-03, PNorm = 177.9758, GNorm = 0.3333, lr_0 = 3.0935e-04
Loss = 1.3873e-02, PNorm = 177.9842, GNorm = 0.2041, lr_0 = 3.0914e-04
Loss = 1.0519e-02, PNorm = 177.9946, GNorm = 0.1507, lr_0 = 3.0892e-04
Loss = 1.0115e-02, PNorm = 178.0095, GNorm = 0.1309, lr_0 = 3.0871e-04
Loss = 9.0519e-03, PNorm = 178.0215, GNorm = 0.2058, lr_0 = 3.0850e-04
Loss = 7.6206e-03, PNorm = 178.0288, GNorm = 0.3904, lr_0 = 3.0829e-04
Loss = 8.4173e-03, PNorm = 178.0378, GNorm = 0.1587, lr_0 = 3.0808e-04
Loss = 1.2841e-02, PNorm = 178.0494, GNorm = 0.1114, lr_0 = 3.0787e-04
Loss = 1.0402e-02, PNorm = 178.0598, GNorm = 0.2410, lr_0 = 3.0766e-04
Loss = 7.9327e-03, PNorm = 178.0701, GNorm = 0.1360, lr_0 = 3.0745e-04
Loss = 8.4113e-03, PNorm = 178.0787, GNorm = 0.0915, lr_0 = 3.0723e-04
Loss = 8.3949e-03, PNorm = 178.0860, GNorm = 0.2170, lr_0 = 3.0702e-04
Loss = 1.3122e-02, PNorm = 178.0945, GNorm = 0.2260, lr_0 = 3.0681e-04
Loss = 1.1051e-02, PNorm = 178.1048, GNorm = 0.2387, lr_0 = 3.0660e-04
Loss = 1.2423e-02, PNorm = 178.1194, GNorm = 0.2838, lr_0 = 3.0639e-04
Loss = 1.5730e-02, PNorm = 178.1325, GNorm = 0.3309, lr_0 = 3.0618e-04
Loss = 1.0631e-02, PNorm = 178.1398, GNorm = 0.1435, lr_0 = 3.0597e-04
Loss = 1.0597e-02, PNorm = 178.1499, GNorm = 0.1790, lr_0 = 3.0576e-04
Loss = 7.3520e-03, PNorm = 178.1589, GNorm = 0.4072, lr_0 = 3.0555e-04
Loss = 8.4982e-03, PNorm = 178.1662, GNorm = 0.1054, lr_0 = 3.0535e-04
Loss = 9.6736e-03, PNorm = 178.1753, GNorm = 0.1132, lr_0 = 3.0514e-04
Loss = 9.7526e-03, PNorm = 178.1855, GNorm = 0.2241, lr_0 = 3.0493e-04
Loss = 8.9492e-03, PNorm = 178.1983, GNorm = 0.2434, lr_0 = 3.0472e-04
Loss = 9.7540e-03, PNorm = 178.2106, GNorm = 0.2468, lr_0 = 3.0451e-04
Loss = 1.5076e-02, PNorm = 178.2221, GNorm = 0.1538, lr_0 = 3.0430e-04
Loss = 1.4977e-02, PNorm = 178.2346, GNorm = 0.3610, lr_0 = 3.0409e-04
Loss = 8.7651e-03, PNorm = 178.2475, GNorm = 0.3658, lr_0 = 3.0388e-04
Loss = 1.1502e-02, PNorm = 178.2561, GNorm = 0.4191, lr_0 = 3.0368e-04
Loss = 1.4036e-02, PNorm = 178.2665, GNorm = 0.4235, lr_0 = 3.0347e-04
Loss = 7.9527e-03, PNorm = 178.2785, GNorm = 0.1567, lr_0 = 3.0326e-04
Loss = 8.6212e-03, PNorm = 178.2908, GNorm = 0.2298, lr_0 = 3.0305e-04
Loss = 1.0106e-02, PNorm = 178.3024, GNorm = 0.6554, lr_0 = 3.0284e-04
Loss = 8.9794e-03, PNorm = 178.3121, GNorm = 0.1365, lr_0 = 3.0264e-04
Loss = 1.6491e-02, PNorm = 178.3255, GNorm = 0.2767, lr_0 = 3.0243e-04
Loss = 9.1317e-03, PNorm = 178.3392, GNorm = 0.1751, lr_0 = 3.0222e-04
Loss = 1.0826e-02, PNorm = 178.3552, GNorm = 0.0990, lr_0 = 3.0202e-04
Loss = 1.2312e-02, PNorm = 178.3677, GNorm = 0.2071, lr_0 = 3.0181e-04
Loss = 1.6302e-02, PNorm = 178.3776, GNorm = 1.4683, lr_0 = 3.0160e-04
Loss = 1.0806e-02, PNorm = 178.3912, GNorm = 0.1800, lr_0 = 3.0140e-04
Loss = 1.5219e-02, PNorm = 178.4064, GNorm = 0.2534, lr_0 = 3.0119e-04
Loss = 8.7738e-03, PNorm = 178.4184, GNorm = 0.1663, lr_0 = 3.0098e-04
Loss = 1.2445e-02, PNorm = 178.4279, GNorm = 0.3969, lr_0 = 3.0078e-04
Loss = 1.4516e-02, PNorm = 178.4341, GNorm = 0.8831, lr_0 = 3.0057e-04
Loss = 9.2691e-03, PNorm = 178.4420, GNorm = 0.1803, lr_0 = 3.0036e-04
Loss = 1.1388e-02, PNorm = 178.4538, GNorm = 0.2860, lr_0 = 3.0016e-04
Loss = 9.1983e-03, PNorm = 178.4677, GNorm = 0.5860, lr_0 = 2.9995e-04
Loss = 9.9632e-03, PNorm = 178.4822, GNorm = 0.1604, lr_0 = 2.9975e-04
Loss = 7.9179e-03, PNorm = 178.4949, GNorm = 0.1786, lr_0 = 2.9954e-04
Loss = 7.9041e-03, PNorm = 178.5063, GNorm = 0.1091, lr_0 = 2.9934e-04
Loss = 1.1646e-02, PNorm = 178.5176, GNorm = 0.2155, lr_0 = 2.9913e-04
Loss = 7.8373e-03, PNorm = 178.5273, GNorm = 0.1890, lr_0 = 2.9893e-04
Loss = 1.0683e-02, PNorm = 178.5400, GNorm = 0.1787, lr_0 = 2.9872e-04
Loss = 9.6928e-03, PNorm = 178.5505, GNorm = 0.3195, lr_0 = 2.9852e-04
Loss = 1.0823e-02, PNorm = 178.5619, GNorm = 0.1810, lr_0 = 2.9831e-04
Loss = 9.5181e-03, PNorm = 178.5725, GNorm = 0.3023, lr_0 = 2.9811e-04
Loss = 7.6826e-03, PNorm = 178.5808, GNorm = 0.2747, lr_0 = 2.9790e-04
Loss = 1.3773e-02, PNorm = 178.5905, GNorm = 0.1249, lr_0 = 2.9770e-04
Loss = 1.1030e-02, PNorm = 178.5983, GNorm = 0.2149, lr_0 = 2.9750e-04
Loss = 1.0225e-02, PNorm = 178.6099, GNorm = 0.2244, lr_0 = 2.9729e-04
Loss = 1.7993e-02, PNorm = 178.6224, GNorm = 0.1394, lr_0 = 2.9709e-04
Loss = 1.2515e-02, PNorm = 178.6342, GNorm = 0.2992, lr_0 = 2.9689e-04
Loss = 8.6076e-03, PNorm = 178.6440, GNorm = 0.1669, lr_0 = 2.9668e-04
Loss = 1.2372e-02, PNorm = 178.6577, GNorm = 0.2900, lr_0 = 2.9648e-04
Loss = 7.3081e-03, PNorm = 178.6673, GNorm = 0.1809, lr_0 = 2.9628e-04
Loss = 1.4581e-02, PNorm = 178.6747, GNorm = 0.2803, lr_0 = 2.9607e-04
Loss = 9.4922e-03, PNorm = 178.6845, GNorm = 0.2331, lr_0 = 2.9587e-04
Loss = 9.6871e-03, PNorm = 178.6944, GNorm = 0.1618, lr_0 = 2.9567e-04
Loss = 1.0658e-02, PNorm = 178.7078, GNorm = 0.2376, lr_0 = 2.9546e-04
Loss = 8.9935e-03, PNorm = 178.7239, GNorm = 0.2004, lr_0 = 2.9526e-04
Loss = 1.1263e-02, PNorm = 178.7378, GNorm = 0.1343, lr_0 = 2.9506e-04
Loss = 9.4004e-03, PNorm = 178.7516, GNorm = 0.3120, lr_0 = 2.9486e-04
Loss = 8.3547e-03, PNorm = 178.7637, GNorm = 0.2376, lr_0 = 2.9466e-04
Loss = 1.0119e-02, PNorm = 178.7739, GNorm = 0.1187, lr_0 = 2.9445e-04
Loss = 8.8720e-03, PNorm = 178.7856, GNorm = 0.3427, lr_0 = 2.9425e-04
Loss = 1.5373e-02, PNorm = 178.7947, GNorm = 0.7828, lr_0 = 2.9405e-04
Loss = 1.1643e-02, PNorm = 178.8048, GNorm = 0.1193, lr_0 = 2.9385e-04
Loss = 1.0410e-02, PNorm = 178.8180, GNorm = 0.2866, lr_0 = 2.9365e-04
Loss = 1.6668e-02, PNorm = 178.8330, GNorm = 0.3751, lr_0 = 2.9345e-04
Loss = 1.0680e-02, PNorm = 178.8443, GNorm = 0.1768, lr_0 = 2.9325e-04
Loss = 9.2778e-03, PNorm = 178.8561, GNorm = 0.2334, lr_0 = 2.9305e-04
Loss = 1.5891e-02, PNorm = 178.8679, GNorm = 0.2929, lr_0 = 2.9284e-04
Loss = 7.3557e-03, PNorm = 178.8789, GNorm = 0.0983, lr_0 = 2.9264e-04
Loss = 9.9079e-03, PNorm = 178.8931, GNorm = 0.3549, lr_0 = 2.9244e-04
Loss = 7.0928e-03, PNorm = 178.9027, GNorm = 0.1641, lr_0 = 2.9224e-04
Loss = 1.3063e-02, PNorm = 178.9113, GNorm = 0.2362, lr_0 = 2.9204e-04
Loss = 8.9398e-03, PNorm = 178.9200, GNorm = 0.2176, lr_0 = 2.9184e-04
Loss = 1.5199e-02, PNorm = 178.9315, GNorm = 0.2962, lr_0 = 2.9164e-04
Loss = 1.2028e-02, PNorm = 178.9425, GNorm = 0.3247, lr_0 = 2.9144e-04
Loss = 9.6423e-03, PNorm = 178.9568, GNorm = 0.1706, lr_0 = 2.9124e-04
Validation mae = 0.121303
Epoch 17
Loss = 6.9885e-03, PNorm = 178.9662, GNorm = 0.3421, lr_0 = 2.9104e-04
Loss = 7.8949e-03, PNorm = 178.9760, GNorm = 0.4109, lr_0 = 2.9084e-04
Loss = 7.3863e-03, PNorm = 178.9853, GNorm = 0.2527, lr_0 = 2.9065e-04
Loss = 1.1828e-02, PNorm = 178.9913, GNorm = 0.2503, lr_0 = 2.9045e-04
Loss = 7.7828e-03, PNorm = 178.9990, GNorm = 0.2771, lr_0 = 2.9025e-04
Loss = 6.8341e-03, PNorm = 179.0101, GNorm = 0.1817, lr_0 = 2.9005e-04
Loss = 7.3735e-03, PNorm = 179.0205, GNorm = 0.1657, lr_0 = 2.8985e-04
Loss = 1.1703e-02, PNorm = 179.0285, GNorm = 0.1949, lr_0 = 2.8965e-04
Loss = 9.0138e-03, PNorm = 179.0353, GNorm = 0.3040, lr_0 = 2.8945e-04
Loss = 1.2062e-02, PNorm = 179.0411, GNorm = 0.4500, lr_0 = 2.8925e-04
Loss = 9.6359e-03, PNorm = 179.0499, GNorm = 0.3655, lr_0 = 2.8906e-04
Loss = 8.4805e-03, PNorm = 179.0602, GNorm = 0.3847, lr_0 = 2.8886e-04
Loss = 1.1902e-02, PNorm = 179.0683, GNorm = 0.3269, lr_0 = 2.8866e-04
Loss = 1.0865e-02, PNorm = 179.0757, GNorm = 0.1470, lr_0 = 2.8846e-04
Loss = 8.1927e-03, PNorm = 179.0853, GNorm = 0.4181, lr_0 = 2.8826e-04
Loss = 1.0908e-02, PNorm = 179.0948, GNorm = 0.2681, lr_0 = 2.8807e-04
Loss = 8.2330e-03, PNorm = 179.1052, GNorm = 0.2573, lr_0 = 2.8787e-04
Loss = 7.6816e-03, PNorm = 179.1146, GNorm = 0.2114, lr_0 = 2.8767e-04
Loss = 6.8792e-03, PNorm = 179.1237, GNorm = 0.2187, lr_0 = 2.8748e-04
Loss = 1.0610e-02, PNorm = 179.1339, GNorm = 0.6949, lr_0 = 2.8728e-04
Loss = 1.4881e-02, PNorm = 179.1425, GNorm = 0.4042, lr_0 = 2.8708e-04
Loss = 9.4495e-03, PNorm = 179.1512, GNorm = 0.2804, lr_0 = 2.8689e-04
Loss = 7.4566e-03, PNorm = 179.1607, GNorm = 0.3268, lr_0 = 2.8669e-04
Loss = 8.7842e-03, PNorm = 179.1706, GNorm = 0.1359, lr_0 = 2.8649e-04
Loss = 9.1030e-03, PNorm = 179.1790, GNorm = 0.1732, lr_0 = 2.8630e-04
Loss = 8.7278e-03, PNorm = 179.1861, GNorm = 0.1410, lr_0 = 2.8610e-04
Loss = 6.3705e-03, PNorm = 179.1955, GNorm = 0.1820, lr_0 = 2.8590e-04
Loss = 8.7547e-03, PNorm = 179.2051, GNorm = 0.2295, lr_0 = 2.8571e-04
Loss = 8.3114e-03, PNorm = 179.2157, GNorm = 0.2047, lr_0 = 2.8551e-04
Loss = 1.2977e-02, PNorm = 179.2262, GNorm = 0.2383, lr_0 = 2.8532e-04
Loss = 7.6603e-03, PNorm = 179.2372, GNorm = 0.2329, lr_0 = 2.8512e-04
Loss = 7.8515e-03, PNorm = 179.2464, GNorm = 0.2812, lr_0 = 2.8493e-04
Loss = 9.3321e-03, PNorm = 179.2534, GNorm = 0.0942, lr_0 = 2.8473e-04
Loss = 1.3332e-02, PNorm = 179.2607, GNorm = 0.2408, lr_0 = 2.8454e-04
Loss = 7.0927e-03, PNorm = 179.2687, GNorm = 0.2976, lr_0 = 2.8434e-04
Loss = 1.0217e-02, PNorm = 179.2760, GNorm = 0.1953, lr_0 = 2.8415e-04
Loss = 9.4740e-03, PNorm = 179.2839, GNorm = 0.2289, lr_0 = 2.8395e-04
Loss = 8.8631e-03, PNorm = 179.2931, GNorm = 0.2548, lr_0 = 2.8376e-04
Loss = 9.3070e-03, PNorm = 179.3032, GNorm = 0.1203, lr_0 = 2.8356e-04
Loss = 7.3270e-03, PNorm = 179.3131, GNorm = 0.2420, lr_0 = 2.8337e-04
Loss = 1.0323e-02, PNorm = 179.3227, GNorm = 0.3837, lr_0 = 2.8317e-04
Loss = 7.2630e-03, PNorm = 179.3353, GNorm = 0.1754, lr_0 = 2.8298e-04
Loss = 1.4237e-02, PNorm = 179.3459, GNorm = 0.2519, lr_0 = 2.8279e-04
Loss = 9.5418e-03, PNorm = 179.3548, GNorm = 0.2037, lr_0 = 2.8259e-04
Loss = 7.3265e-03, PNorm = 179.3625, GNorm = 0.1418, lr_0 = 2.8240e-04
Loss = 7.3711e-03, PNorm = 179.3714, GNorm = 0.6196, lr_0 = 2.8221e-04
Loss = 7.0022e-03, PNorm = 179.3763, GNorm = 0.2972, lr_0 = 2.8201e-04
Loss = 9.6760e-03, PNorm = 179.3851, GNorm = 0.2256, lr_0 = 2.8182e-04
Loss = 1.2154e-02, PNorm = 179.3938, GNorm = 0.1098, lr_0 = 2.8163e-04
Loss = 5.9877e-03, PNorm = 179.4030, GNorm = 0.1556, lr_0 = 2.8143e-04
Loss = 8.1198e-03, PNorm = 179.4115, GNorm = 0.1997, lr_0 = 2.8124e-04
Loss = 8.6195e-03, PNorm = 179.4186, GNorm = 0.1457, lr_0 = 2.8105e-04
Loss = 7.5226e-03, PNorm = 179.4274, GNorm = 0.1425, lr_0 = 2.8085e-04
Loss = 1.1602e-02, PNorm = 179.4378, GNorm = 0.2179, lr_0 = 2.8066e-04
Loss = 6.6717e-03, PNorm = 179.4454, GNorm = 0.2868, lr_0 = 2.8047e-04
Loss = 8.3570e-03, PNorm = 179.4552, GNorm = 0.1780, lr_0 = 2.8028e-04
Loss = 9.3639e-03, PNorm = 179.4629, GNorm = 0.4882, lr_0 = 2.8009e-04
Loss = 9.4316e-03, PNorm = 179.4710, GNorm = 0.2698, lr_0 = 2.7989e-04
Loss = 8.3399e-03, PNorm = 179.4787, GNorm = 0.4604, lr_0 = 2.7970e-04
Loss = 8.0457e-03, PNorm = 179.4893, GNorm = 0.1581, lr_0 = 2.7951e-04
Loss = 9.7051e-03, PNorm = 179.5002, GNorm = 0.1793, lr_0 = 2.7932e-04
Loss = 9.7075e-03, PNorm = 179.5072, GNorm = 0.1751, lr_0 = 2.7913e-04
Loss = 1.0528e-02, PNorm = 179.5144, GNorm = 0.2528, lr_0 = 2.7894e-04
Loss = 7.5303e-03, PNorm = 179.5238, GNorm = 0.1180, lr_0 = 2.7875e-04
Loss = 1.0183e-02, PNorm = 179.5325, GNorm = 0.4028, lr_0 = 2.7855e-04
Loss = 1.2830e-02, PNorm = 179.5420, GNorm = 0.2989, lr_0 = 2.7836e-04
Loss = 1.1279e-02, PNorm = 179.5552, GNorm = 0.1939, lr_0 = 2.7817e-04
Loss = 9.4099e-03, PNorm = 179.5645, GNorm = 0.4016, lr_0 = 2.7798e-04
Loss = 9.9645e-03, PNorm = 179.5754, GNorm = 0.3637, lr_0 = 2.7779e-04
Loss = 6.6470e-03, PNorm = 179.5852, GNorm = 0.2316, lr_0 = 2.7760e-04
Loss = 8.5969e-03, PNorm = 179.5948, GNorm = 0.2189, lr_0 = 2.7741e-04
Loss = 6.9270e-03, PNorm = 179.6053, GNorm = 0.2805, lr_0 = 2.7722e-04
Loss = 8.0838e-03, PNorm = 179.6130, GNorm = 0.3346, lr_0 = 2.7703e-04
Loss = 7.6795e-03, PNorm = 179.6223, GNorm = 0.1720, lr_0 = 2.7684e-04
Loss = 5.7406e-03, PNorm = 179.6317, GNorm = 0.1690, lr_0 = 2.7665e-04
Loss = 6.9537e-03, PNorm = 179.6390, GNorm = 0.1415, lr_0 = 2.7646e-04
Loss = 1.0032e-02, PNorm = 179.6461, GNorm = 0.1740, lr_0 = 2.7627e-04
Loss = 7.3279e-03, PNorm = 179.6556, GNorm = 0.2815, lr_0 = 2.7608e-04
Loss = 7.7569e-03, PNorm = 179.6632, GNorm = 0.1891, lr_0 = 2.7590e-04
Loss = 1.1509e-02, PNorm = 179.6756, GNorm = 0.2249, lr_0 = 2.7571e-04
Loss = 7.3148e-03, PNorm = 179.6854, GNorm = 0.1812, lr_0 = 2.7552e-04
Loss = 8.2448e-03, PNorm = 179.6930, GNorm = 0.3282, lr_0 = 2.7533e-04
Loss = 7.2069e-03, PNorm = 179.7023, GNorm = 0.2335, lr_0 = 2.7514e-04
Loss = 8.6523e-03, PNorm = 179.7121, GNorm = 0.3917, lr_0 = 2.7495e-04
Loss = 8.4938e-03, PNorm = 179.7224, GNorm = 0.1171, lr_0 = 2.7476e-04
Loss = 7.5400e-03, PNorm = 179.7322, GNorm = 0.2660, lr_0 = 2.7457e-04
Loss = 8.2103e-03, PNorm = 179.7447, GNorm = 0.0913, lr_0 = 2.7439e-04
Loss = 6.5295e-03, PNorm = 179.7563, GNorm = 0.1482, lr_0 = 2.7420e-04
Loss = 9.6589e-03, PNorm = 179.7666, GNorm = 0.2161, lr_0 = 2.7401e-04
Loss = 6.3438e-03, PNorm = 179.7759, GNorm = 0.1509, lr_0 = 2.7382e-04
Loss = 1.3969e-02, PNorm = 179.7852, GNorm = 0.2910, lr_0 = 2.7364e-04
Loss = 6.8333e-03, PNorm = 179.7931, GNorm = 0.1379, lr_0 = 2.7345e-04
Loss = 7.4012e-03, PNorm = 179.8025, GNorm = 0.0847, lr_0 = 2.7326e-04
Loss = 9.1690e-03, PNorm = 179.8118, GNorm = 0.2084, lr_0 = 2.7307e-04
Loss = 1.0435e-02, PNorm = 179.8197, GNorm = 0.1386, lr_0 = 2.7289e-04
Loss = 8.1118e-03, PNorm = 179.8290, GNorm = 0.1946, lr_0 = 2.7270e-04
Loss = 7.3945e-03, PNorm = 179.8393, GNorm = 0.1812, lr_0 = 2.7251e-04
Loss = 8.9433e-03, PNorm = 179.8488, GNorm = 0.1438, lr_0 = 2.7233e-04
Loss = 1.2212e-02, PNorm = 179.8577, GNorm = 0.1733, lr_0 = 2.7214e-04
Loss = 8.1822e-03, PNorm = 179.8679, GNorm = 0.2599, lr_0 = 2.7195e-04
Loss = 1.3913e-02, PNorm = 179.8765, GNorm = 0.3102, lr_0 = 2.7177e-04
Loss = 7.2727e-03, PNorm = 179.8873, GNorm = 0.3175, lr_0 = 2.7158e-04
Loss = 5.8233e-03, PNorm = 179.8951, GNorm = 0.2409, lr_0 = 2.7139e-04
Loss = 1.0636e-02, PNorm = 179.9048, GNorm = 0.0837, lr_0 = 2.7121e-04
Loss = 1.3564e-02, PNorm = 179.9166, GNorm = 0.2286, lr_0 = 2.7102e-04
Loss = 1.7769e-02, PNorm = 179.9260, GNorm = 0.3580, lr_0 = 2.7084e-04
Loss = 1.4489e-02, PNorm = 179.9446, GNorm = 0.2414, lr_0 = 2.7065e-04
Loss = 9.9287e-03, PNorm = 179.9559, GNorm = 0.1895, lr_0 = 2.7047e-04
Loss = 9.2230e-03, PNorm = 179.9654, GNorm = 0.2699, lr_0 = 2.7028e-04
Loss = 2.0538e-02, PNorm = 179.9780, GNorm = 0.3128, lr_0 = 2.7010e-04
Loss = 1.0485e-02, PNorm = 179.9893, GNorm = 0.1504, lr_0 = 2.6991e-04
Loss = 8.0885e-03, PNorm = 179.9991, GNorm = 0.2888, lr_0 = 2.6973e-04
Loss = 1.3320e-02, PNorm = 180.0098, GNorm = 0.1233, lr_0 = 2.6954e-04
Loss = 1.2217e-02, PNorm = 180.0198, GNorm = 0.1741, lr_0 = 2.6936e-04
Loss = 1.1836e-02, PNorm = 180.0295, GNorm = 0.3689, lr_0 = 2.6917e-04
Loss = 7.0044e-03, PNorm = 180.0413, GNorm = 0.3264, lr_0 = 2.6899e-04
Loss = 9.6284e-03, PNorm = 180.0525, GNorm = 0.4463, lr_0 = 2.6880e-04
Loss = 8.3514e-03, PNorm = 180.0656, GNorm = 0.3366, lr_0 = 2.6862e-04
Loss = 1.2287e-02, PNorm = 180.0749, GNorm = 0.3970, lr_0 = 2.6844e-04
Loss = 1.4857e-02, PNorm = 180.0852, GNorm = 0.1441, lr_0 = 2.6825e-04
Validation mae = 0.121019
Epoch 18
Loss = 1.2149e-02, PNorm = 180.0925, GNorm = 0.2450, lr_0 = 2.6807e-04
Loss = 1.0656e-02, PNorm = 180.1004, GNorm = 0.5744, lr_0 = 2.6788e-04
Loss = 6.5852e-03, PNorm = 180.1064, GNorm = 0.1149, lr_0 = 2.6770e-04
Loss = 1.0003e-02, PNorm = 180.1124, GNorm = 0.3350, lr_0 = 2.6752e-04
Loss = 1.0825e-02, PNorm = 180.1194, GNorm = 0.1882, lr_0 = 2.6733e-04
Loss = 1.1050e-02, PNorm = 180.1252, GNorm = 0.2842, lr_0 = 2.6715e-04
Loss = 7.0288e-03, PNorm = 180.1318, GNorm = 0.2057, lr_0 = 2.6697e-04
Loss = 1.0825e-02, PNorm = 180.1399, GNorm = 0.2218, lr_0 = 2.6678e-04
Loss = 6.4115e-03, PNorm = 180.1476, GNorm = 0.1767, lr_0 = 2.6660e-04
Loss = 6.4935e-03, PNorm = 180.1550, GNorm = 0.1195, lr_0 = 2.6642e-04
Loss = 7.5458e-03, PNorm = 180.1610, GNorm = 0.1232, lr_0 = 2.6624e-04
Loss = 6.7638e-03, PNorm = 180.1674, GNorm = 0.1235, lr_0 = 2.6605e-04
Loss = 7.0177e-03, PNorm = 180.1729, GNorm = 0.1006, lr_0 = 2.6587e-04
Loss = 8.1349e-03, PNorm = 180.1816, GNorm = 0.0972, lr_0 = 2.6569e-04
Loss = 6.0246e-03, PNorm = 180.1872, GNorm = 0.1260, lr_0 = 2.6551e-04
Loss = 9.9950e-03, PNorm = 180.1918, GNorm = 0.2362, lr_0 = 2.6533e-04
Loss = 7.5678e-03, PNorm = 180.1984, GNorm = 0.2452, lr_0 = 2.6514e-04
Loss = 7.1330e-03, PNorm = 180.2047, GNorm = 0.1029, lr_0 = 2.6496e-04
Loss = 9.8106e-03, PNorm = 180.2139, GNorm = 0.1947, lr_0 = 2.6478e-04
Loss = 7.1022e-03, PNorm = 180.2195, GNorm = 0.1282, lr_0 = 2.6460e-04
Loss = 7.9317e-03, PNorm = 180.2268, GNorm = 0.3109, lr_0 = 2.6442e-04
Loss = 8.5723e-03, PNorm = 180.2337, GNorm = 0.1580, lr_0 = 2.6424e-04
Loss = 6.0747e-03, PNorm = 180.2415, GNorm = 0.0960, lr_0 = 2.6406e-04
Loss = 6.6038e-03, PNorm = 180.2511, GNorm = 0.1406, lr_0 = 2.6388e-04
Loss = 6.3493e-03, PNorm = 180.2595, GNorm = 0.1395, lr_0 = 2.6369e-04
Loss = 5.6587e-03, PNorm = 180.2658, GNorm = 0.2149, lr_0 = 2.6351e-04
Loss = 8.2891e-03, PNorm = 180.2730, GNorm = 0.1486, lr_0 = 2.6333e-04
Loss = 7.9977e-03, PNorm = 180.2794, GNorm = 0.1338, lr_0 = 2.6315e-04
Loss = 7.8080e-03, PNorm = 180.2858, GNorm = 0.2330, lr_0 = 2.6297e-04
Loss = 7.6074e-03, PNorm = 180.2921, GNorm = 0.2569, lr_0 = 2.6279e-04
Loss = 8.7555e-03, PNorm = 180.2965, GNorm = 0.4306, lr_0 = 2.6261e-04
Loss = 7.6183e-03, PNorm = 180.3009, GNorm = 0.1930, lr_0 = 2.6243e-04
Loss = 6.7925e-03, PNorm = 180.3091, GNorm = 0.2875, lr_0 = 2.6225e-04
Loss = 6.5492e-03, PNorm = 180.3185, GNorm = 0.1626, lr_0 = 2.6207e-04
Loss = 1.0278e-02, PNorm = 180.3280, GNorm = 0.2169, lr_0 = 2.6189e-04
Loss = 7.6700e-03, PNorm = 180.3371, GNorm = 0.1206, lr_0 = 2.6171e-04
Loss = 7.0051e-03, PNorm = 180.3452, GNorm = 0.2386, lr_0 = 2.6153e-04
Loss = 6.0601e-03, PNorm = 180.3518, GNorm = 0.3260, lr_0 = 2.6136e-04
Loss = 5.5404e-03, PNorm = 180.3566, GNorm = 0.1284, lr_0 = 2.6118e-04
Loss = 5.8364e-03, PNorm = 180.3629, GNorm = 0.1893, lr_0 = 2.6100e-04
Loss = 7.7934e-03, PNorm = 180.3700, GNorm = 0.0844, lr_0 = 2.6082e-04
Loss = 6.4538e-03, PNorm = 180.3764, GNorm = 0.2293, lr_0 = 2.6064e-04
Loss = 6.3425e-03, PNorm = 180.3839, GNorm = 0.0814, lr_0 = 2.6046e-04
Loss = 5.7116e-03, PNorm = 180.3935, GNorm = 0.1820, lr_0 = 2.6028e-04
Loss = 7.4640e-03, PNorm = 180.4042, GNorm = 0.1037, lr_0 = 2.6011e-04
Loss = 9.9611e-03, PNorm = 180.4120, GNorm = 0.1106, lr_0 = 2.5993e-04
Loss = 1.5210e-02, PNorm = 180.4221, GNorm = 0.5548, lr_0 = 2.5975e-04
Loss = 8.1962e-03, PNorm = 180.4313, GNorm = 0.2387, lr_0 = 2.5957e-04
Loss = 5.8920e-03, PNorm = 180.4372, GNorm = 0.2264, lr_0 = 2.5939e-04
Loss = 1.1213e-02, PNorm = 180.4414, GNorm = 0.1254, lr_0 = 2.5922e-04
Loss = 7.3894e-03, PNorm = 180.4451, GNorm = 0.3958, lr_0 = 2.5904e-04
Loss = 6.7710e-03, PNorm = 180.4526, GNorm = 0.1468, lr_0 = 2.5886e-04
Loss = 8.0270e-03, PNorm = 180.4625, GNorm = 0.7370, lr_0 = 2.5868e-04
Loss = 8.0912e-03, PNorm = 180.4726, GNorm = 0.1181, lr_0 = 2.5851e-04
Loss = 1.0088e-02, PNorm = 180.4805, GNorm = 0.3117, lr_0 = 2.5833e-04
Loss = 5.6634e-03, PNorm = 180.4887, GNorm = 0.3469, lr_0 = 2.5815e-04
Loss = 5.5421e-03, PNorm = 180.4964, GNorm = 0.2381, lr_0 = 2.5797e-04
Loss = 7.7462e-03, PNorm = 180.5048, GNorm = 0.0919, lr_0 = 2.5780e-04
Loss = 8.0984e-03, PNorm = 180.5134, GNorm = 0.3737, lr_0 = 2.5762e-04
Loss = 7.5721e-03, PNorm = 180.5217, GNorm = 0.1482, lr_0 = 2.5745e-04
Loss = 6.5969e-03, PNorm = 180.5303, GNorm = 0.5041, lr_0 = 2.5727e-04
Loss = 7.8872e-03, PNorm = 180.5371, GNorm = 0.1686, lr_0 = 2.5709e-04
Loss = 1.0521e-02, PNorm = 180.5444, GNorm = 0.1153, lr_0 = 2.5692e-04
Loss = 8.2759e-03, PNorm = 180.5536, GNorm = 0.1511, lr_0 = 2.5674e-04
Loss = 1.3361e-02, PNorm = 180.5615, GNorm = 0.2634, lr_0 = 2.5656e-04
Loss = 1.1355e-02, PNorm = 180.5708, GNorm = 0.1660, lr_0 = 2.5639e-04
Loss = 1.7700e-02, PNorm = 180.5796, GNorm = 0.0996, lr_0 = 2.5621e-04
Loss = 1.0980e-02, PNorm = 180.5880, GNorm = 0.1382, lr_0 = 2.5604e-04
Loss = 6.9724e-03, PNorm = 180.5953, GNorm = 0.2195, lr_0 = 2.5586e-04
Loss = 6.8337e-03, PNorm = 180.6025, GNorm = 0.2696, lr_0 = 2.5569e-04
Loss = 5.8247e-03, PNorm = 180.6105, GNorm = 0.1456, lr_0 = 2.5551e-04
Loss = 6.4316e-03, PNorm = 180.6179, GNorm = 0.1410, lr_0 = 2.5534e-04
Loss = 2.4216e-02, PNorm = 180.6259, GNorm = 0.5390, lr_0 = 2.5516e-04
Loss = 8.4515e-03, PNorm = 180.6329, GNorm = 0.2213, lr_0 = 2.5499e-04
Loss = 1.1852e-02, PNorm = 180.6376, GNorm = 0.2096, lr_0 = 2.5481e-04
Loss = 7.2808e-03, PNorm = 180.6463, GNorm = 0.1101, lr_0 = 2.5464e-04
Loss = 7.5146e-03, PNorm = 180.6564, GNorm = 0.0927, lr_0 = 2.5446e-04
Loss = 6.0085e-03, PNorm = 180.6652, GNorm = 0.2085, lr_0 = 2.5429e-04
Loss = 1.2704e-02, PNorm = 180.6710, GNorm = 0.4608, lr_0 = 2.5411e-04
Loss = 1.2230e-02, PNorm = 180.6829, GNorm = 0.2509, lr_0 = 2.5394e-04
Loss = 9.1568e-03, PNorm = 180.6913, GNorm = 0.3268, lr_0 = 2.5377e-04
Loss = 7.2762e-03, PNorm = 180.6969, GNorm = 0.1769, lr_0 = 2.5359e-04
Loss = 8.4295e-03, PNorm = 180.7062, GNorm = 0.2433, lr_0 = 2.5342e-04
Loss = 6.6855e-03, PNorm = 180.7146, GNorm = 0.1845, lr_0 = 2.5325e-04
Loss = 5.9954e-03, PNorm = 180.7227, GNorm = 0.1945, lr_0 = 2.5307e-04
Loss = 1.4098e-02, PNorm = 180.7299, GNorm = 0.2411, lr_0 = 2.5290e-04
Loss = 5.4377e-03, PNorm = 180.7386, GNorm = 0.0731, lr_0 = 2.5273e-04
Loss = 6.9600e-03, PNorm = 180.7464, GNorm = 0.2899, lr_0 = 2.5255e-04
Loss = 7.3990e-03, PNorm = 180.7527, GNorm = 0.1990, lr_0 = 2.5238e-04
Loss = 8.1042e-03, PNorm = 180.7603, GNorm = 0.1396, lr_0 = 2.5221e-04
Loss = 9.4562e-03, PNorm = 180.7689, GNorm = 0.4308, lr_0 = 2.5203e-04
Loss = 5.4812e-03, PNorm = 180.7751, GNorm = 0.1902, lr_0 = 2.5186e-04
Loss = 9.5622e-03, PNorm = 180.7829, GNorm = 0.3426, lr_0 = 2.5169e-04
Loss = 8.2125e-03, PNorm = 180.7910, GNorm = 0.1754, lr_0 = 2.5152e-04
Loss = 1.2563e-02, PNorm = 180.7988, GNorm = 0.5785, lr_0 = 2.5134e-04
Loss = 5.3291e-03, PNorm = 180.8072, GNorm = 0.1460, lr_0 = 2.5117e-04
Loss = 6.3006e-03, PNorm = 180.8176, GNorm = 0.1352, lr_0 = 2.5100e-04
Loss = 8.0177e-03, PNorm = 180.8279, GNorm = 0.4508, lr_0 = 2.5083e-04
Loss = 6.6372e-03, PNorm = 180.8357, GNorm = 0.2531, lr_0 = 2.5066e-04
Loss = 5.4193e-03, PNorm = 180.8443, GNorm = 0.1153, lr_0 = 2.5048e-04
Loss = 5.2039e-03, PNorm = 180.8523, GNorm = 0.2487, lr_0 = 2.5031e-04
Loss = 1.2867e-02, PNorm = 180.8580, GNorm = 0.1781, lr_0 = 2.5014e-04
Loss = 7.0862e-03, PNorm = 180.8671, GNorm = 0.2012, lr_0 = 2.4997e-04
Loss = 6.9540e-03, PNorm = 180.8763, GNorm = 0.2991, lr_0 = 2.4980e-04
Loss = 8.3557e-03, PNorm = 180.8860, GNorm = 0.3529, lr_0 = 2.4963e-04
Loss = 1.0605e-02, PNorm = 180.8941, GNorm = 0.9452, lr_0 = 2.4946e-04
Loss = 7.6334e-03, PNorm = 180.9025, GNorm = 0.1922, lr_0 = 2.4929e-04
Loss = 9.4981e-03, PNorm = 180.9087, GNorm = 0.1108, lr_0 = 2.4911e-04
Loss = 5.1461e-03, PNorm = 180.9173, GNorm = 0.1247, lr_0 = 2.4894e-04
Loss = 6.3273e-03, PNorm = 180.9245, GNorm = 0.1442, lr_0 = 2.4877e-04
Loss = 7.1109e-03, PNorm = 180.9332, GNorm = 0.2343, lr_0 = 2.4860e-04
Loss = 7.3867e-03, PNorm = 180.9431, GNorm = 0.2845, lr_0 = 2.4843e-04
Loss = 5.4958e-03, PNorm = 180.9526, GNorm = 0.1703, lr_0 = 2.4826e-04
Loss = 4.4998e-03, PNorm = 180.9621, GNorm = 0.1145, lr_0 = 2.4809e-04
Loss = 6.9903e-03, PNorm = 180.9701, GNorm = 0.1892, lr_0 = 2.4792e-04
Loss = 8.3925e-03, PNorm = 180.9804, GNorm = 0.0852, lr_0 = 2.4775e-04
Loss = 5.7087e-03, PNorm = 180.9891, GNorm = 0.2754, lr_0 = 2.4758e-04
Loss = 8.5859e-03, PNorm = 180.9960, GNorm = 0.1958, lr_0 = 2.4741e-04
Loss = 7.6371e-03, PNorm = 181.0026, GNorm = 0.1393, lr_0 = 2.4724e-04
Loss = 1.0765e-02, PNorm = 181.0104, GNorm = 0.3016, lr_0 = 2.4707e-04
Validation mae = 0.121386
Epoch 19
Loss = 6.2289e-03, PNorm = 181.0182, GNorm = 0.0955, lr_0 = 2.4690e-04
Loss = 7.9845e-03, PNorm = 181.0241, GNorm = 0.1090, lr_0 = 2.4674e-04
Loss = 5.3103e-03, PNorm = 181.0278, GNorm = 0.2579, lr_0 = 2.4657e-04
Loss = 1.0096e-02, PNorm = 181.0330, GNorm = 0.1936, lr_0 = 2.4640e-04
Loss = 5.7951e-03, PNorm = 181.0373, GNorm = 0.1265, lr_0 = 2.4623e-04
Loss = 6.3080e-03, PNorm = 181.0405, GNorm = 0.1375, lr_0 = 2.4606e-04
Loss = 4.8947e-03, PNorm = 181.0463, GNorm = 0.2942, lr_0 = 2.4589e-04
Loss = 7.8784e-03, PNorm = 181.0505, GNorm = 0.3298, lr_0 = 2.4572e-04
Loss = 5.4279e-03, PNorm = 181.0555, GNorm = 0.2585, lr_0 = 2.4556e-04
Loss = 5.7810e-03, PNorm = 181.0629, GNorm = 0.1128, lr_0 = 2.4539e-04
Loss = 4.5638e-03, PNorm = 181.0694, GNorm = 0.1346, lr_0 = 2.4522e-04
Loss = 5.9477e-03, PNorm = 181.0762, GNorm = 0.1857, lr_0 = 2.4505e-04
Loss = 7.5131e-03, PNorm = 181.0813, GNorm = 0.2295, lr_0 = 2.4488e-04
Loss = 5.9307e-03, PNorm = 181.0862, GNorm = 0.2427, lr_0 = 2.4472e-04
Loss = 6.0868e-03, PNorm = 181.0933, GNorm = 0.3865, lr_0 = 2.4455e-04
Loss = 5.2156e-03, PNorm = 181.0980, GNorm = 0.1198, lr_0 = 2.4438e-04
Loss = 4.6476e-03, PNorm = 181.1020, GNorm = 0.1111, lr_0 = 2.4421e-04
Loss = 6.3831e-03, PNorm = 181.1067, GNorm = 0.2052, lr_0 = 2.4405e-04
Loss = 6.1211e-03, PNorm = 181.1131, GNorm = 0.1520, lr_0 = 2.4388e-04
Loss = 6.5479e-03, PNorm = 181.1221, GNorm = 0.0945, lr_0 = 2.4371e-04
Loss = 4.8653e-03, PNorm = 181.1275, GNorm = 0.1698, lr_0 = 2.4354e-04
Loss = 5.0680e-03, PNorm = 181.1312, GNorm = 0.1259, lr_0 = 2.4338e-04
Loss = 6.0678e-03, PNorm = 181.1346, GNorm = 0.0769, lr_0 = 2.4321e-04
Loss = 9.8217e-03, PNorm = 181.1401, GNorm = 0.1502, lr_0 = 2.4304e-04
Loss = 5.5012e-03, PNorm = 181.1445, GNorm = 0.2195, lr_0 = 2.4288e-04
Loss = 6.2421e-03, PNorm = 181.1485, GNorm = 0.0745, lr_0 = 2.4271e-04
Loss = 6.7631e-03, PNorm = 181.1548, GNorm = 0.2235, lr_0 = 2.4254e-04
Loss = 6.7464e-03, PNorm = 181.1622, GNorm = 0.1661, lr_0 = 2.4238e-04
Loss = 1.0163e-02, PNorm = 181.1676, GNorm = 0.2741, lr_0 = 2.4221e-04
Loss = 4.9593e-03, PNorm = 181.1755, GNorm = 0.2535, lr_0 = 2.4205e-04
Loss = 4.7498e-03, PNorm = 181.1821, GNorm = 0.0971, lr_0 = 2.4188e-04
Loss = 1.2197e-02, PNorm = 181.1863, GNorm = 0.1145, lr_0 = 2.4171e-04
Loss = 5.3406e-03, PNorm = 181.1963, GNorm = 0.2116, lr_0 = 2.4155e-04
Loss = 6.9367e-03, PNorm = 181.2017, GNorm = 0.1370, lr_0 = 2.4138e-04
Loss = 5.4042e-03, PNorm = 181.2074, GNorm = 0.1725, lr_0 = 2.4122e-04
Loss = 5.4414e-03, PNorm = 181.2135, GNorm = 0.1193, lr_0 = 2.4105e-04
Loss = 5.6393e-03, PNorm = 181.2212, GNorm = 0.1974, lr_0 = 2.4089e-04
Loss = 8.4390e-03, PNorm = 181.2268, GNorm = 0.2405, lr_0 = 2.4072e-04
Loss = 7.0974e-03, PNorm = 181.2322, GNorm = 0.2409, lr_0 = 2.4056e-04
Loss = 6.3027e-03, PNorm = 181.2364, GNorm = 0.1791, lr_0 = 2.4039e-04
Loss = 9.8540e-03, PNorm = 181.2406, GNorm = 0.0852, lr_0 = 2.4023e-04
Loss = 5.1788e-03, PNorm = 181.2468, GNorm = 0.1032, lr_0 = 2.4006e-04
Loss = 6.7745e-03, PNorm = 181.2519, GNorm = 0.1208, lr_0 = 2.3990e-04
Loss = 9.1982e-03, PNorm = 181.2584, GNorm = 0.1245, lr_0 = 2.3974e-04
Loss = 9.6769e-03, PNorm = 181.2644, GNorm = 0.1227, lr_0 = 2.3957e-04
Loss = 8.6722e-03, PNorm = 181.2701, GNorm = 0.2953, lr_0 = 2.3941e-04
Loss = 1.0295e-02, PNorm = 181.2788, GNorm = 0.2859, lr_0 = 2.3924e-04
Loss = 9.6159e-03, PNorm = 181.2864, GNorm = 0.3008, lr_0 = 2.3908e-04
Loss = 7.7734e-03, PNorm = 181.2929, GNorm = 0.1496, lr_0 = 2.3892e-04
Loss = 1.0081e-02, PNorm = 181.2988, GNorm = 0.1012, lr_0 = 2.3875e-04
Loss = 8.8595e-03, PNorm = 181.3048, GNorm = 0.1455, lr_0 = 2.3859e-04
Loss = 5.4415e-03, PNorm = 181.3093, GNorm = 0.1510, lr_0 = 2.3842e-04
Loss = 6.7155e-03, PNorm = 181.3163, GNorm = 0.1935, lr_0 = 2.3826e-04
Loss = 1.0040e-02, PNorm = 181.3225, GNorm = 0.1720, lr_0 = 2.3810e-04
Loss = 9.5415e-03, PNorm = 181.3273, GNorm = 0.3302, lr_0 = 2.3794e-04
Loss = 8.2377e-03, PNorm = 181.3339, GNorm = 0.1142, lr_0 = 2.3777e-04
Loss = 7.9333e-03, PNorm = 181.3419, GNorm = 0.0832, lr_0 = 2.3761e-04
Loss = 5.6882e-03, PNorm = 181.3472, GNorm = 0.2966, lr_0 = 2.3745e-04
Loss = 8.9512e-03, PNorm = 181.3545, GNorm = 0.2312, lr_0 = 2.3728e-04
Loss = 9.7457e-03, PNorm = 181.3602, GNorm = 0.2393, lr_0 = 2.3712e-04
Loss = 1.4624e-02, PNorm = 181.3675, GNorm = 0.0851, lr_0 = 2.3696e-04
Loss = 6.1426e-03, PNorm = 181.3742, GNorm = 0.1325, lr_0 = 2.3680e-04
Loss = 4.8668e-03, PNorm = 181.3815, GNorm = 0.1715, lr_0 = 2.3663e-04
Loss = 1.0568e-02, PNorm = 181.3875, GNorm = 0.2728, lr_0 = 2.3647e-04
Loss = 6.1392e-03, PNorm = 181.3948, GNorm = 0.1640, lr_0 = 2.3631e-04
Loss = 5.5928e-03, PNorm = 181.4015, GNorm = 0.1467, lr_0 = 2.3615e-04
Loss = 8.5966e-03, PNorm = 181.4102, GNorm = 0.1564, lr_0 = 2.3599e-04
Loss = 1.2642e-02, PNorm = 181.4167, GNorm = 0.1135, lr_0 = 2.3582e-04
Loss = 4.5704e-03, PNorm = 181.4246, GNorm = 0.1020, lr_0 = 2.3566e-04
Loss = 4.7733e-03, PNorm = 181.4292, GNorm = 0.2441, lr_0 = 2.3550e-04
Loss = 6.2628e-03, PNorm = 181.4333, GNorm = 0.1217, lr_0 = 2.3534e-04
Loss = 5.9158e-03, PNorm = 181.4381, GNorm = 0.1338, lr_0 = 2.3518e-04
Loss = 4.8010e-03, PNorm = 181.4442, GNorm = 0.1527, lr_0 = 2.3502e-04
Loss = 5.7398e-03, PNorm = 181.4494, GNorm = 0.1243, lr_0 = 2.3486e-04
Loss = 7.9700e-03, PNorm = 181.4563, GNorm = 0.1183, lr_0 = 2.3470e-04
Loss = 7.7022e-03, PNorm = 181.4611, GNorm = 0.1407, lr_0 = 2.3454e-04
Loss = 7.7522e-03, PNorm = 181.4647, GNorm = 0.1398, lr_0 = 2.3437e-04
Loss = 5.4544e-03, PNorm = 181.4681, GNorm = 0.0895, lr_0 = 2.3421e-04
Loss = 4.4576e-03, PNorm = 181.4721, GNorm = 0.2014, lr_0 = 2.3405e-04
Loss = 5.1729e-03, PNorm = 181.4799, GNorm = 0.1372, lr_0 = 2.3389e-04
Loss = 6.7389e-03, PNorm = 181.4856, GNorm = 0.1367, lr_0 = 2.3373e-04
Loss = 5.1329e-03, PNorm = 181.4923, GNorm = 0.1688, lr_0 = 2.3357e-04
Loss = 7.5901e-03, PNorm = 181.5006, GNorm = 0.1630, lr_0 = 2.3341e-04
Loss = 6.7027e-03, PNorm = 181.5078, GNorm = 0.0819, lr_0 = 2.3325e-04
Loss = 8.8381e-03, PNorm = 181.5139, GNorm = 0.1622, lr_0 = 2.3309e-04
Loss = 7.0702e-03, PNorm = 181.5202, GNorm = 0.2938, lr_0 = 2.3293e-04
Loss = 1.3108e-02, PNorm = 181.5261, GNorm = 0.6325, lr_0 = 2.3277e-04
Loss = 4.4127e-03, PNorm = 181.5326, GNorm = 0.2128, lr_0 = 2.3261e-04
Loss = 4.5308e-03, PNorm = 181.5384, GNorm = 0.2830, lr_0 = 2.3246e-04
Loss = 6.1678e-03, PNorm = 181.5433, GNorm = 0.0738, lr_0 = 2.3230e-04
Loss = 7.7853e-03, PNorm = 181.5495, GNorm = 0.1618, lr_0 = 2.3214e-04
Loss = 6.9346e-03, PNorm = 181.5569, GNorm = 0.3843, lr_0 = 2.3198e-04
Loss = 8.1652e-03, PNorm = 181.5621, GNorm = 0.1404, lr_0 = 2.3182e-04
Loss = 4.8460e-03, PNorm = 181.5693, GNorm = 0.1504, lr_0 = 2.3166e-04
Loss = 5.0831e-03, PNorm = 181.5752, GNorm = 0.1470, lr_0 = 2.3150e-04
Loss = 8.5895e-03, PNorm = 181.5809, GNorm = 0.1975, lr_0 = 2.3134e-04
Loss = 4.7288e-03, PNorm = 181.5874, GNorm = 0.2596, lr_0 = 2.3118e-04
Loss = 6.6986e-03, PNorm = 181.5948, GNorm = 0.2174, lr_0 = 2.3103e-04
Loss = 4.3243e-03, PNorm = 181.6019, GNorm = 0.1679, lr_0 = 2.3087e-04
Loss = 7.0948e-03, PNorm = 181.6059, GNorm = 0.2209, lr_0 = 2.3071e-04
Loss = 7.9674e-03, PNorm = 181.6135, GNorm = 0.2462, lr_0 = 2.3055e-04
Loss = 6.0283e-03, PNorm = 181.6224, GNorm = 0.1204, lr_0 = 2.3039e-04
Loss = 7.0518e-03, PNorm = 181.6309, GNorm = 0.2275, lr_0 = 2.3024e-04
Loss = 9.2105e-03, PNorm = 181.6370, GNorm = 1.4053, lr_0 = 2.3008e-04
Loss = 6.0790e-03, PNorm = 181.6424, GNorm = 0.2565, lr_0 = 2.2992e-04
Loss = 4.0126e-03, PNorm = 181.6498, GNorm = 0.1484, lr_0 = 2.2976e-04
Loss = 4.6876e-03, PNorm = 181.6552, GNorm = 0.1147, lr_0 = 2.2961e-04
Loss = 7.0127e-03, PNorm = 181.6636, GNorm = 0.2836, lr_0 = 2.2945e-04
Loss = 3.9198e-03, PNorm = 181.6707, GNorm = 0.1886, lr_0 = 2.2929e-04
Loss = 6.3083e-03, PNorm = 181.6755, GNorm = 0.3699, lr_0 = 2.2913e-04
Loss = 5.3123e-03, PNorm = 181.6835, GNorm = 0.0787, lr_0 = 2.2898e-04
Loss = 8.7370e-03, PNorm = 181.6906, GNorm = 0.3400, lr_0 = 2.2882e-04
Loss = 9.9877e-03, PNorm = 181.6965, GNorm = 0.2376, lr_0 = 2.2866e-04
Loss = 7.2581e-03, PNorm = 181.7031, GNorm = 0.1858, lr_0 = 2.2851e-04
Loss = 4.4887e-03, PNorm = 181.7111, GNorm = 0.1252, lr_0 = 2.2835e-04
Loss = 2.4641e-02, PNorm = 181.7188, GNorm = 0.5639, lr_0 = 2.2819e-04
Loss = 5.1006e-03, PNorm = 181.7254, GNorm = 0.2308, lr_0 = 2.2804e-04
Loss = 5.3280e-03, PNorm = 181.7339, GNorm = 0.1758, lr_0 = 2.2788e-04
Loss = 1.0445e-02, PNorm = 181.7389, GNorm = 0.2192, lr_0 = 2.2773e-04
Loss = 1.0452e-02, PNorm = 181.7453, GNorm = 0.1450, lr_0 = 2.2757e-04
Validation mae = 0.121193
Epoch 20
Loss = 7.0760e-03, PNorm = 181.7506, GNorm = 0.1148, lr_0 = 2.2741e-04
Loss = 3.8185e-03, PNorm = 181.7555, GNorm = 0.1702, lr_0 = 2.2726e-04
Loss = 6.2904e-03, PNorm = 181.7599, GNorm = 0.1055, lr_0 = 2.2710e-04
Loss = 1.0259e-02, PNorm = 181.7663, GNorm = 0.1535, lr_0 = 2.2695e-04
Loss = 4.6756e-03, PNorm = 181.7731, GNorm = 0.1480, lr_0 = 2.2679e-04
Loss = 8.9301e-03, PNorm = 181.7780, GNorm = 0.0794, lr_0 = 2.2664e-04
Loss = 4.1058e-03, PNorm = 181.7805, GNorm = 0.1675, lr_0 = 2.2648e-04
Loss = 5.3493e-03, PNorm = 181.7839, GNorm = 0.3406, lr_0 = 2.2632e-04
Loss = 1.0867e-02, PNorm = 181.7881, GNorm = 0.1678, lr_0 = 2.2617e-04
Loss = 1.0303e-02, PNorm = 181.7926, GNorm = 0.2624, lr_0 = 2.2601e-04
Loss = 3.9800e-03, PNorm = 181.7992, GNorm = 0.1390, lr_0 = 2.2586e-04
Loss = 5.3585e-03, PNorm = 181.8064, GNorm = 0.1168, lr_0 = 2.2571e-04
Loss = 4.1195e-03, PNorm = 181.8135, GNorm = 0.1398, lr_0 = 2.2555e-04
Loss = 4.2309e-03, PNorm = 181.8192, GNorm = 0.1067, lr_0 = 2.2540e-04
Loss = 4.6488e-03, PNorm = 181.8249, GNorm = 0.1026, lr_0 = 2.2524e-04
Loss = 5.0085e-03, PNorm = 181.8307, GNorm = 0.2505, lr_0 = 2.2509e-04
Loss = 6.4814e-03, PNorm = 181.8344, GNorm = 0.1558, lr_0 = 2.2493e-04
Loss = 7.1234e-03, PNorm = 181.8383, GNorm = 0.0861, lr_0 = 2.2478e-04
Loss = 8.8880e-03, PNorm = 181.8405, GNorm = 0.1756, lr_0 = 2.2463e-04
Loss = 5.6729e-03, PNorm = 181.8429, GNorm = 0.3150, lr_0 = 2.2447e-04
Loss = 7.4583e-03, PNorm = 181.8490, GNorm = 0.1569, lr_0 = 2.2432e-04
Loss = 8.4144e-03, PNorm = 181.8541, GNorm = 0.2228, lr_0 = 2.2416e-04
Loss = 5.7064e-03, PNorm = 181.8615, GNorm = 0.2801, lr_0 = 2.2401e-04
Loss = 7.8884e-03, PNorm = 181.8681, GNorm = 0.2658, lr_0 = 2.2386e-04
Loss = 4.1556e-03, PNorm = 181.8731, GNorm = 0.3464, lr_0 = 2.2370e-04
Loss = 9.1791e-03, PNorm = 181.8770, GNorm = 0.1376, lr_0 = 2.2355e-04
Loss = 4.1909e-03, PNorm = 181.8837, GNorm = 0.2072, lr_0 = 2.2340e-04
Loss = 5.7423e-03, PNorm = 181.8892, GNorm = 0.2589, lr_0 = 2.2324e-04
Loss = 5.2789e-03, PNorm = 181.8938, GNorm = 0.3717, lr_0 = 2.2309e-04
Loss = 1.7178e-02, PNorm = 181.9007, GNorm = 0.2856, lr_0 = 2.2294e-04
Loss = 5.2656e-03, PNorm = 181.9062, GNorm = 0.1368, lr_0 = 2.2279e-04
Loss = 5.7593e-03, PNorm = 181.9096, GNorm = 0.1218, lr_0 = 2.2263e-04
Loss = 3.8763e-03, PNorm = 181.9145, GNorm = 0.2624, lr_0 = 2.2248e-04
Loss = 4.4016e-03, PNorm = 181.9164, GNorm = 0.2921, lr_0 = 2.2233e-04
Loss = 6.7585e-03, PNorm = 181.9197, GNorm = 0.1709, lr_0 = 2.2218e-04
Loss = 4.2755e-03, PNorm = 181.9258, GNorm = 0.1260, lr_0 = 2.2202e-04
Loss = 9.0590e-03, PNorm = 181.9306, GNorm = 0.1275, lr_0 = 2.2187e-04
Loss = 3.9059e-03, PNorm = 181.9354, GNorm = 0.1429, lr_0 = 2.2172e-04
Loss = 5.5013e-03, PNorm = 181.9383, GNorm = 0.2091, lr_0 = 2.2157e-04
Loss = 4.7426e-03, PNorm = 181.9404, GNorm = 0.1344, lr_0 = 2.2142e-04
Loss = 3.8699e-03, PNorm = 181.9438, GNorm = 0.0739, lr_0 = 2.2126e-04
Loss = 1.0247e-02, PNorm = 181.9482, GNorm = 0.1022, lr_0 = 2.2111e-04
Loss = 4.3288e-03, PNorm = 181.9527, GNorm = 0.3156, lr_0 = 2.2096e-04
Loss = 5.1792e-03, PNorm = 181.9580, GNorm = 0.1003, lr_0 = 2.2081e-04
Loss = 1.3466e-02, PNorm = 181.9638, GNorm = 0.1324, lr_0 = 2.2066e-04
Loss = 5.4012e-03, PNorm = 181.9684, GNorm = 0.1754, lr_0 = 2.2051e-04
Loss = 5.6666e-03, PNorm = 181.9722, GNorm = 0.2386, lr_0 = 2.2036e-04
Loss = 5.2986e-03, PNorm = 181.9762, GNorm = 0.1720, lr_0 = 2.2021e-04
Loss = 4.2975e-03, PNorm = 181.9812, GNorm = 0.2201, lr_0 = 2.2005e-04
Loss = 5.5730e-03, PNorm = 181.9866, GNorm = 0.2325, lr_0 = 2.1990e-04
Loss = 7.1211e-03, PNorm = 181.9911, GNorm = 0.2746, lr_0 = 2.1975e-04
Loss = 9.2113e-03, PNorm = 181.9959, GNorm = 0.0906, lr_0 = 2.1960e-04
Loss = 4.9948e-03, PNorm = 182.0005, GNorm = 0.3884, lr_0 = 2.1945e-04
Loss = 7.6620e-03, PNorm = 182.0077, GNorm = 0.3004, lr_0 = 2.1930e-04
Loss = 4.9489e-03, PNorm = 182.0129, GNorm = 0.1631, lr_0 = 2.1915e-04
Loss = 8.2344e-03, PNorm = 182.0158, GNorm = 0.1729, lr_0 = 2.1900e-04
Loss = 4.7843e-03, PNorm = 182.0217, GNorm = 0.1090, lr_0 = 2.1885e-04
Loss = 4.8747e-03, PNorm = 182.0299, GNorm = 0.2369, lr_0 = 2.1870e-04
Loss = 3.7587e-03, PNorm = 182.0382, GNorm = 0.1038, lr_0 = 2.1855e-04
Loss = 6.2235e-03, PNorm = 182.0461, GNorm = 0.1478, lr_0 = 2.1840e-04
Loss = 4.8890e-03, PNorm = 182.0524, GNorm = 0.1912, lr_0 = 2.1825e-04
Loss = 8.9844e-03, PNorm = 182.0577, GNorm = 0.2179, lr_0 = 2.1810e-04
Loss = 4.6195e-03, PNorm = 182.0609, GNorm = 0.2163, lr_0 = 2.1795e-04
Loss = 3.5288e-03, PNorm = 182.0647, GNorm = 0.1184, lr_0 = 2.1780e-04
Loss = 5.8327e-03, PNorm = 182.0692, GNorm = 0.1474, lr_0 = 2.1765e-04
Loss = 4.0501e-03, PNorm = 182.0726, GNorm = 0.1182, lr_0 = 2.1751e-04
Loss = 4.4516e-03, PNorm = 182.0787, GNorm = 0.1282, lr_0 = 2.1736e-04
Loss = 5.6242e-03, PNorm = 182.0852, GNorm = 0.1909, lr_0 = 2.1721e-04
Loss = 7.0068e-03, PNorm = 182.0898, GNorm = 0.0984, lr_0 = 2.1706e-04
Loss = 4.3234e-03, PNorm = 182.0945, GNorm = 0.0974, lr_0 = 2.1691e-04
Loss = 5.9763e-03, PNorm = 182.0975, GNorm = 0.0774, lr_0 = 2.1676e-04
Loss = 1.0566e-02, PNorm = 182.1019, GNorm = 0.1712, lr_0 = 2.1661e-04
Loss = 9.5126e-03, PNorm = 182.1071, GNorm = 0.1197, lr_0 = 2.1646e-04
Loss = 7.6650e-03, PNorm = 182.1110, GNorm = 0.1544, lr_0 = 2.1632e-04
Loss = 4.6268e-03, PNorm = 182.1190, GNorm = 0.1164, lr_0 = 2.1617e-04
Loss = 5.5776e-03, PNorm = 182.1264, GNorm = 0.1878, lr_0 = 2.1602e-04
Loss = 4.6628e-03, PNorm = 182.1307, GNorm = 0.1775, lr_0 = 2.1587e-04
Loss = 5.3117e-03, PNorm = 182.1358, GNorm = 0.1745, lr_0 = 2.1572e-04
Loss = 9.0047e-03, PNorm = 182.1408, GNorm = 0.1332, lr_0 = 2.1558e-04
Loss = 5.1232e-03, PNorm = 182.1458, GNorm = 0.1912, lr_0 = 2.1543e-04
Loss = 5.4432e-03, PNorm = 182.1495, GNorm = 0.2702, lr_0 = 2.1528e-04
Loss = 6.1775e-03, PNorm = 182.1547, GNorm = 0.1318, lr_0 = 2.1513e-04
Loss = 5.5287e-03, PNorm = 182.1602, GNorm = 0.3737, lr_0 = 2.1499e-04
Loss = 8.7718e-03, PNorm = 182.1642, GNorm = 0.1074, lr_0 = 2.1484e-04
Loss = 8.0532e-03, PNorm = 182.1694, GNorm = 1.0715, lr_0 = 2.1469e-04
Loss = 4.8833e-03, PNorm = 182.1761, GNorm = 0.1506, lr_0 = 2.1454e-04
Loss = 3.7443e-03, PNorm = 182.1815, GNorm = 0.0996, lr_0 = 2.1440e-04
Loss = 4.8800e-03, PNorm = 182.1878, GNorm = 0.3539, lr_0 = 2.1425e-04
Loss = 9.1271e-03, PNorm = 182.1948, GNorm = 0.1109, lr_0 = 2.1410e-04
Loss = 6.3861e-03, PNorm = 182.2002, GNorm = 0.1318, lr_0 = 2.1396e-04
Loss = 8.7438e-03, PNorm = 182.2049, GNorm = 0.2056, lr_0 = 2.1381e-04
Loss = 7.4698e-03, PNorm = 182.2116, GNorm = 0.3122, lr_0 = 2.1366e-04
Loss = 5.2748e-03, PNorm = 182.2193, GNorm = 0.1396, lr_0 = 2.1352e-04
Loss = 5.7013e-03, PNorm = 182.2264, GNorm = 0.3439, lr_0 = 2.1337e-04
Loss = 4.3595e-03, PNorm = 182.2304, GNorm = 0.1304, lr_0 = 2.1323e-04
Loss = 3.4911e-03, PNorm = 182.2354, GNorm = 0.0762, lr_0 = 2.1308e-04
Loss = 1.3353e-02, PNorm = 182.2409, GNorm = 0.4135, lr_0 = 2.1293e-04
Loss = 4.4519e-03, PNorm = 182.2474, GNorm = 0.2057, lr_0 = 2.1279e-04
Loss = 4.4108e-03, PNorm = 182.2536, GNorm = 0.1315, lr_0 = 2.1264e-04
Loss = 7.7907e-03, PNorm = 182.2584, GNorm = 0.2494, lr_0 = 2.1250e-04
Loss = 7.2580e-03, PNorm = 182.2647, GNorm = 0.1749, lr_0 = 2.1235e-04
Loss = 1.0243e-02, PNorm = 182.2700, GNorm = 0.1542, lr_0 = 2.1221e-04
Loss = 7.9789e-03, PNorm = 182.2734, GNorm = 0.0715, lr_0 = 2.1206e-04
Loss = 5.5746e-03, PNorm = 182.2775, GNorm = 0.3215, lr_0 = 2.1191e-04
Loss = 4.4941e-03, PNorm = 182.2843, GNorm = 0.2491, lr_0 = 2.1177e-04
Loss = 1.2842e-02, PNorm = 182.2907, GNorm = 0.1033, lr_0 = 2.1162e-04
Loss = 5.2749e-03, PNorm = 182.2965, GNorm = 0.3364, lr_0 = 2.1148e-04
Loss = 5.6770e-03, PNorm = 182.3038, GNorm = 0.1333, lr_0 = 2.1133e-04
Loss = 9.7843e-03, PNorm = 182.3120, GNorm = 1.6656, lr_0 = 2.1119e-04
Loss = 6.4371e-03, PNorm = 182.3185, GNorm = 0.5403, lr_0 = 2.1104e-04
Loss = 4.5742e-03, PNorm = 182.3262, GNorm = 0.2061, lr_0 = 2.1090e-04
Loss = 4.6070e-03, PNorm = 182.3325, GNorm = 0.2938, lr_0 = 2.1076e-04
Loss = 4.0210e-03, PNorm = 182.3417, GNorm = 0.1014, lr_0 = 2.1061e-04
Loss = 7.6401e-03, PNorm = 182.3482, GNorm = 0.1926, lr_0 = 2.1047e-04
Loss = 6.2030e-03, PNorm = 182.3544, GNorm = 0.2210, lr_0 = 2.1032e-04
Loss = 9.1422e-03, PNorm = 182.3606, GNorm = 0.2959, lr_0 = 2.1018e-04
Loss = 8.6060e-03, PNorm = 182.3659, GNorm = 0.0827, lr_0 = 2.1003e-04
Loss = 5.6185e-03, PNorm = 182.3717, GNorm = 0.2969, lr_0 = 2.0989e-04
Loss = 9.9376e-03, PNorm = 182.3775, GNorm = 0.1842, lr_0 = 2.0975e-04
Loss = 7.7071e-03, PNorm = 182.3829, GNorm = 0.2411, lr_0 = 2.0960e-04
Validation mae = 0.120938
Epoch 21
Loss = 3.7312e-03, PNorm = 182.3882, GNorm = 0.1411, lr_0 = 2.0946e-04
Loss = 4.2212e-03, PNorm = 182.3912, GNorm = 0.1069, lr_0 = 2.0932e-04
Loss = 4.4537e-03, PNorm = 182.3957, GNorm = 0.1285, lr_0 = 2.0917e-04
Loss = 8.1692e-03, PNorm = 182.4006, GNorm = 0.2390, lr_0 = 2.0903e-04
Loss = 6.6949e-03, PNorm = 182.4033, GNorm = 0.1466, lr_0 = 2.0889e-04
Loss = 4.6609e-03, PNorm = 182.4093, GNorm = 0.3338, lr_0 = 2.0874e-04
Loss = 3.4444e-03, PNorm = 182.4132, GNorm = 0.1764, lr_0 = 2.0860e-04
Loss = 6.3782e-03, PNorm = 182.4174, GNorm = 0.1340, lr_0 = 2.0846e-04
Loss = 4.7164e-03, PNorm = 182.4239, GNorm = 0.1399, lr_0 = 2.0831e-04
Loss = 5.6282e-03, PNorm = 182.4289, GNorm = 0.1232, lr_0 = 2.0817e-04
Loss = 3.7262e-03, PNorm = 182.4334, GNorm = 0.1823, lr_0 = 2.0803e-04
Loss = 5.1603e-03, PNorm = 182.4369, GNorm = 0.0846, lr_0 = 2.0789e-04
Loss = 4.3942e-03, PNorm = 182.4415, GNorm = 0.0759, lr_0 = 2.0774e-04
Loss = 5.2706e-03, PNorm = 182.4470, GNorm = 0.1906, lr_0 = 2.0760e-04
Loss = 3.8554e-03, PNorm = 182.4523, GNorm = 0.1183, lr_0 = 2.0746e-04
Loss = 4.3378e-03, PNorm = 182.4560, GNorm = 0.4230, lr_0 = 2.0732e-04
Loss = 5.9968e-03, PNorm = 182.4600, GNorm = 0.1190, lr_0 = 2.0718e-04
Loss = 6.0746e-03, PNorm = 182.4640, GNorm = 0.1769, lr_0 = 2.0703e-04
Loss = 5.0695e-03, PNorm = 182.4674, GNorm = 0.1084, lr_0 = 2.0689e-04
Loss = 3.2948e-03, PNorm = 182.4713, GNorm = 0.2116, lr_0 = 2.0675e-04
Loss = 7.7800e-03, PNorm = 182.4751, GNorm = 0.1823, lr_0 = 2.0661e-04
Loss = 3.8780e-03, PNorm = 182.4777, GNorm = 0.1228, lr_0 = 2.0647e-04
Loss = 4.9458e-03, PNorm = 182.4810, GNorm = 0.1490, lr_0 = 2.0633e-04
Loss = 3.4180e-03, PNorm = 182.4851, GNorm = 0.1338, lr_0 = 2.0618e-04
Loss = 7.4390e-03, PNorm = 182.4888, GNorm = 0.2673, lr_0 = 2.0604e-04
Loss = 4.2376e-03, PNorm = 182.4933, GNorm = 0.2040, lr_0 = 2.0590e-04
Loss = 5.3690e-03, PNorm = 182.4965, GNorm = 0.2781, lr_0 = 2.0576e-04
Loss = 1.3077e-02, PNorm = 182.4982, GNorm = 0.2118, lr_0 = 2.0562e-04
Loss = 3.7596e-03, PNorm = 182.5023, GNorm = 0.2231, lr_0 = 2.0548e-04
Loss = 3.4033e-03, PNorm = 182.5076, GNorm = 0.1838, lr_0 = 2.0534e-04
Loss = 3.9856e-03, PNorm = 182.5102, GNorm = 0.1413, lr_0 = 2.0520e-04
Loss = 3.5501e-03, PNorm = 182.5147, GNorm = 0.1008, lr_0 = 2.0506e-04
Loss = 4.4961e-03, PNorm = 182.5175, GNorm = 0.1121, lr_0 = 2.0492e-04
Loss = 4.5181e-03, PNorm = 182.5205, GNorm = 0.4452, lr_0 = 2.0478e-04
Loss = 4.1266e-03, PNorm = 182.5267, GNorm = 0.1218, lr_0 = 2.0464e-04
Loss = 4.3444e-03, PNorm = 182.5306, GNorm = 0.1822, lr_0 = 2.0450e-04
Loss = 1.0147e-02, PNorm = 182.5361, GNorm = 0.1623, lr_0 = 2.0436e-04
Loss = 4.4380e-03, PNorm = 182.5430, GNorm = 0.0687, lr_0 = 2.0422e-04
Loss = 3.9186e-03, PNorm = 182.5472, GNorm = 0.1312, lr_0 = 2.0408e-04
Loss = 5.4202e-03, PNorm = 182.5513, GNorm = 0.1192, lr_0 = 2.0394e-04
Loss = 3.0132e-03, PNorm = 182.5559, GNorm = 0.1145, lr_0 = 2.0380e-04
Loss = 3.4363e-03, PNorm = 182.5604, GNorm = 0.2809, lr_0 = 2.0366e-04
Loss = 5.0002e-03, PNorm = 182.5638, GNorm = 0.0683, lr_0 = 2.0352e-04
Loss = 8.2979e-03, PNorm = 182.5674, GNorm = 0.2296, lr_0 = 2.0338e-04
Loss = 5.0894e-03, PNorm = 182.5722, GNorm = 0.0612, lr_0 = 2.0324e-04
Loss = 5.2462e-03, PNorm = 182.5776, GNorm = 0.1444, lr_0 = 2.0310e-04
Loss = 8.3576e-03, PNorm = 182.5822, GNorm = 0.2896, lr_0 = 2.0296e-04
Loss = 6.4109e-03, PNorm = 182.5848, GNorm = 0.1838, lr_0 = 2.0282e-04
Loss = 5.5102e-03, PNorm = 182.5905, GNorm = 0.0895, lr_0 = 2.0268e-04
Loss = 8.0343e-03, PNorm = 182.5956, GNorm = 0.5709, lr_0 = 2.0254e-04
Loss = 4.9852e-03, PNorm = 182.5997, GNorm = 0.1630, lr_0 = 2.0240e-04
Loss = 3.8581e-03, PNorm = 182.6053, GNorm = 0.1096, lr_0 = 2.0227e-04
Loss = 3.6556e-03, PNorm = 182.6116, GNorm = 0.1359, lr_0 = 2.0213e-04
Loss = 7.0689e-03, PNorm = 182.6175, GNorm = 0.1518, lr_0 = 2.0199e-04
Loss = 8.2099e-03, PNorm = 182.6224, GNorm = 0.1971, lr_0 = 2.0185e-04
Loss = 7.5749e-03, PNorm = 182.6254, GNorm = 0.1253, lr_0 = 2.0171e-04
Loss = 4.7375e-03, PNorm = 182.6294, GNorm = 0.2655, lr_0 = 2.0157e-04
Loss = 7.9589e-03, PNorm = 182.6345, GNorm = 0.2464, lr_0 = 2.0144e-04
Loss = 5.9299e-03, PNorm = 182.6387, GNorm = 0.2152, lr_0 = 2.0130e-04
Loss = 3.2576e-03, PNorm = 182.6417, GNorm = 0.1706, lr_0 = 2.0116e-04
Loss = 3.7798e-03, PNorm = 182.6461, GNorm = 0.1067, lr_0 = 2.0102e-04
Loss = 5.5118e-03, PNorm = 182.6496, GNorm = 0.1563, lr_0 = 2.0088e-04
Loss = 3.2674e-03, PNorm = 182.6558, GNorm = 0.2625, lr_0 = 2.0075e-04
Loss = 4.1254e-03, PNorm = 182.6615, GNorm = 0.1241, lr_0 = 2.0061e-04
Loss = 5.1489e-03, PNorm = 182.6664, GNorm = 0.2722, lr_0 = 2.0047e-04
Loss = 9.8554e-03, PNorm = 182.6693, GNorm = 0.5209, lr_0 = 2.0033e-04
Loss = 6.7612e-03, PNorm = 182.6741, GNorm = 0.1811, lr_0 = 2.0020e-04
Loss = 5.5972e-03, PNorm = 182.6792, GNorm = 0.1928, lr_0 = 2.0006e-04
Loss = 9.4768e-03, PNorm = 182.6837, GNorm = 0.2559, lr_0 = 1.9992e-04
Loss = 5.9445e-03, PNorm = 182.6875, GNorm = 0.2317, lr_0 = 1.9979e-04
Loss = 5.4610e-03, PNorm = 182.6907, GNorm = 0.1844, lr_0 = 1.9965e-04
Loss = 7.5990e-03, PNorm = 182.6960, GNorm = 0.1960, lr_0 = 1.9951e-04
Loss = 5.4948e-03, PNorm = 182.7004, GNorm = 0.1599, lr_0 = 1.9938e-04
Loss = 5.2374e-03, PNorm = 182.7071, GNorm = 0.1727, lr_0 = 1.9924e-04
Loss = 3.9781e-03, PNorm = 182.7119, GNorm = 0.0999, lr_0 = 1.9910e-04
Loss = 3.3019e-03, PNorm = 182.7159, GNorm = 0.1148, lr_0 = 1.9897e-04
Loss = 3.5769e-03, PNorm = 182.7206, GNorm = 0.1129, lr_0 = 1.9883e-04
Loss = 8.0697e-03, PNorm = 182.7226, GNorm = 0.4282, lr_0 = 1.9869e-04
Loss = 1.2972e-02, PNorm = 182.7248, GNorm = 0.4784, lr_0 = 1.9856e-04
Loss = 3.9399e-03, PNorm = 182.7291, GNorm = 0.1700, lr_0 = 1.9842e-04
Loss = 5.9200e-03, PNorm = 182.7331, GNorm = 0.1193, lr_0 = 1.9829e-04
Loss = 4.1555e-03, PNorm = 182.7368, GNorm = 0.2513, lr_0 = 1.9815e-04
Loss = 8.4870e-03, PNorm = 182.7432, GNorm = 0.0922, lr_0 = 1.9801e-04
Loss = 7.3068e-03, PNorm = 182.7497, GNorm = 0.0921, lr_0 = 1.9788e-04
Loss = 3.3350e-03, PNorm = 182.7550, GNorm = 0.1302, lr_0 = 1.9774e-04
Loss = 4.0152e-03, PNorm = 182.7595, GNorm = 0.0780, lr_0 = 1.9761e-04
Loss = 3.5993e-03, PNorm = 182.7624, GNorm = 0.1102, lr_0 = 1.9747e-04
Loss = 4.2735e-03, PNorm = 182.7669, GNorm = 0.2315, lr_0 = 1.9734e-04
Loss = 8.1628e-03, PNorm = 182.7726, GNorm = 0.4906, lr_0 = 1.9720e-04
Loss = 3.4892e-03, PNorm = 182.7785, GNorm = 0.0715, lr_0 = 1.9707e-04
Loss = 8.6674e-03, PNorm = 182.7829, GNorm = 0.1415, lr_0 = 1.9693e-04
Loss = 6.9195e-03, PNorm = 182.7891, GNorm = 0.7410, lr_0 = 1.9680e-04
Loss = 7.8116e-03, PNorm = 182.7939, GNorm = 0.2492, lr_0 = 1.9666e-04
Loss = 5.6227e-03, PNorm = 182.7973, GNorm = 0.1199, lr_0 = 1.9653e-04
Loss = 3.9498e-03, PNorm = 182.8032, GNorm = 0.0564, lr_0 = 1.9639e-04
Loss = 5.1261e-03, PNorm = 182.8083, GNorm = 0.1585, lr_0 = 1.9626e-04
Loss = 4.8858e-03, PNorm = 182.8147, GNorm = 0.1004, lr_0 = 1.9612e-04
Loss = 4.8598e-03, PNorm = 182.8220, GNorm = 0.1383, lr_0 = 1.9599e-04
Loss = 2.1488e-02, PNorm = 182.8281, GNorm = 0.2861, lr_0 = 1.9585e-04
Loss = 8.0775e-03, PNorm = 182.8310, GNorm = 0.1576, lr_0 = 1.9572e-04
Loss = 4.1498e-03, PNorm = 182.8348, GNorm = 0.0792, lr_0 = 1.9559e-04
Loss = 8.2567e-03, PNorm = 182.8407, GNorm = 0.1338, lr_0 = 1.9545e-04
Loss = 8.0161e-03, PNorm = 182.8475, GNorm = 0.1256, lr_0 = 1.9532e-04
Loss = 3.7702e-03, PNorm = 182.8541, GNorm = 0.2850, lr_0 = 1.9518e-04
Loss = 3.8130e-03, PNorm = 182.8590, GNorm = 0.2266, lr_0 = 1.9505e-04
Loss = 7.0791e-03, PNorm = 182.8655, GNorm = 0.1606, lr_0 = 1.9492e-04
Loss = 7.0383e-03, PNorm = 182.8699, GNorm = 1.0041, lr_0 = 1.9478e-04
Loss = 5.9078e-03, PNorm = 182.8728, GNorm = 0.1771, lr_0 = 1.9465e-04
Loss = 6.5719e-03, PNorm = 182.8765, GNorm = 0.2649, lr_0 = 1.9452e-04
Loss = 6.2412e-03, PNorm = 182.8808, GNorm = 0.1719, lr_0 = 1.9438e-04
Loss = 5.1646e-03, PNorm = 182.8859, GNorm = 0.1183, lr_0 = 1.9425e-04
Loss = 1.2237e-02, PNorm = 182.8904, GNorm = 0.0910, lr_0 = 1.9412e-04
Loss = 6.1631e-03, PNorm = 182.8932, GNorm = 0.1490, lr_0 = 1.9398e-04
Loss = 4.3767e-03, PNorm = 182.8970, GNorm = 0.1798, lr_0 = 1.9385e-04
Loss = 9.0603e-03, PNorm = 182.9034, GNorm = 0.4485, lr_0 = 1.9372e-04
Loss = 7.2731e-03, PNorm = 182.9049, GNorm = 0.2631, lr_0 = 1.9359e-04
Loss = 6.8260e-03, PNorm = 182.9097, GNorm = 0.1815, lr_0 = 1.9345e-04
Loss = 7.2777e-03, PNorm = 182.9154, GNorm = 0.1647, lr_0 = 1.9332e-04
Loss = 4.8678e-03, PNorm = 182.9195, GNorm = 0.1050, lr_0 = 1.9319e-04
Loss = 3.5020e-03, PNorm = 182.9225, GNorm = 0.1519, lr_0 = 1.9306e-04
Validation mae = 0.120847
Epoch 22
Loss = 8.2088e-03, PNorm = 182.9244, GNorm = 0.3523, lr_0 = 1.9292e-04
Loss = 3.8986e-03, PNorm = 182.9267, GNorm = 0.0925, lr_0 = 1.9279e-04
Loss = 4.9822e-03, PNorm = 182.9290, GNorm = 0.2302, lr_0 = 1.9266e-04
Loss = 7.0341e-03, PNorm = 182.9333, GNorm = 0.1918, lr_0 = 1.9253e-04
Loss = 7.0287e-03, PNorm = 182.9381, GNorm = 0.2075, lr_0 = 1.9240e-04
Loss = 3.3971e-03, PNorm = 182.9413, GNorm = 0.1369, lr_0 = 1.9226e-04
Loss = 4.9106e-03, PNorm = 182.9442, GNorm = 0.2522, lr_0 = 1.9213e-04
Loss = 4.1453e-03, PNorm = 182.9491, GNorm = 0.0972, lr_0 = 1.9200e-04
Loss = 5.3000e-03, PNorm = 182.9527, GNorm = 0.1079, lr_0 = 1.9187e-04
Loss = 4.0530e-03, PNorm = 182.9548, GNorm = 0.2701, lr_0 = 1.9174e-04
Loss = 3.9854e-03, PNorm = 182.9585, GNorm = 0.2849, lr_0 = 1.9161e-04
Loss = 5.2171e-03, PNorm = 182.9616, GNorm = 0.1804, lr_0 = 1.9148e-04
Loss = 3.7231e-03, PNorm = 182.9656, GNorm = 0.0523, lr_0 = 1.9134e-04
Loss = 4.2319e-03, PNorm = 182.9699, GNorm = 0.3307, lr_0 = 1.9121e-04
Loss = 3.4339e-03, PNorm = 182.9742, GNorm = 0.1005, lr_0 = 1.9108e-04
Loss = 3.5124e-03, PNorm = 182.9790, GNorm = 0.1652, lr_0 = 1.9095e-04
Loss = 4.2302e-03, PNorm = 182.9843, GNorm = 0.2131, lr_0 = 1.9082e-04
Loss = 4.2698e-03, PNorm = 182.9895, GNorm = 0.1534, lr_0 = 1.9069e-04
Loss = 2.9551e-03, PNorm = 182.9939, GNorm = 0.0652, lr_0 = 1.9056e-04
Loss = 2.6060e-03, PNorm = 182.9981, GNorm = 0.1524, lr_0 = 1.9043e-04
Loss = 8.7647e-03, PNorm = 183.0014, GNorm = 0.5210, lr_0 = 1.9030e-04
Loss = 4.5533e-03, PNorm = 183.0055, GNorm = 0.0629, lr_0 = 1.9017e-04
Loss = 3.7632e-03, PNorm = 183.0108, GNorm = 0.1544, lr_0 = 1.9004e-04
Loss = 3.2055e-03, PNorm = 183.0142, GNorm = 0.1996, lr_0 = 1.8991e-04
Loss = 3.0954e-03, PNorm = 183.0167, GNorm = 0.0699, lr_0 = 1.8978e-04
Loss = 5.1198e-03, PNorm = 183.0181, GNorm = 0.0574, lr_0 = 1.8965e-04
Loss = 3.6009e-03, PNorm = 183.0211, GNorm = 0.1562, lr_0 = 1.8952e-04
Loss = 3.5518e-03, PNorm = 183.0255, GNorm = 0.1419, lr_0 = 1.8939e-04
Loss = 6.7843e-03, PNorm = 183.0303, GNorm = 0.1748, lr_0 = 1.8926e-04
Loss = 4.7299e-03, PNorm = 183.0335, GNorm = 0.2866, lr_0 = 1.8913e-04
Loss = 7.8984e-03, PNorm = 183.0387, GNorm = 0.1156, lr_0 = 1.8900e-04
Loss = 4.1920e-03, PNorm = 183.0417, GNorm = 0.1680, lr_0 = 1.8887e-04
Loss = 5.1038e-03, PNorm = 183.0447, GNorm = 0.1375, lr_0 = 1.8874e-04
Loss = 2.9552e-03, PNorm = 183.0482, GNorm = 0.0949, lr_0 = 1.8861e-04
Loss = 4.5447e-03, PNorm = 183.0549, GNorm = 0.1217, lr_0 = 1.8848e-04
Loss = 3.7560e-03, PNorm = 183.0601, GNorm = 0.1885, lr_0 = 1.8835e-04
Loss = 5.7932e-03, PNorm = 183.0639, GNorm = 0.1960, lr_0 = 1.8822e-04
Loss = 4.7724e-03, PNorm = 183.0648, GNorm = 0.1761, lr_0 = 1.8809e-04
Loss = 3.7252e-03, PNorm = 183.0669, GNorm = 0.0815, lr_0 = 1.8797e-04
Loss = 5.1574e-03, PNorm = 183.0704, GNorm = 0.1367, lr_0 = 1.8784e-04
Loss = 6.7322e-03, PNorm = 183.0728, GNorm = 0.1343, lr_0 = 1.8771e-04
Loss = 8.1565e-03, PNorm = 183.0756, GNorm = 0.1209, lr_0 = 1.8758e-04
Loss = 5.1213e-03, PNorm = 183.0801, GNorm = 0.0753, lr_0 = 1.8745e-04
Loss = 7.3784e-03, PNorm = 183.0849, GNorm = 0.2490, lr_0 = 1.8732e-04
Loss = 7.8110e-03, PNorm = 183.0887, GNorm = 0.0588, lr_0 = 1.8719e-04
Loss = 5.0011e-03, PNorm = 183.0919, GNorm = 0.2452, lr_0 = 1.8707e-04
Loss = 3.0651e-03, PNorm = 183.0961, GNorm = 0.0935, lr_0 = 1.8694e-04
Loss = 3.5796e-03, PNorm = 183.1007, GNorm = 0.1976, lr_0 = 1.8681e-04
Loss = 4.4709e-03, PNorm = 183.1041, GNorm = 0.1913, lr_0 = 1.8668e-04
Loss = 5.4112e-03, PNorm = 183.1081, GNorm = 0.1463, lr_0 = 1.8655e-04
Loss = 4.6464e-03, PNorm = 183.1100, GNorm = 0.1050, lr_0 = 1.8643e-04
Loss = 3.2994e-03, PNorm = 183.1132, GNorm = 0.2672, lr_0 = 1.8630e-04
Loss = 1.2591e-02, PNorm = 183.1197, GNorm = 0.0778, lr_0 = 1.8617e-04
Loss = 6.3794e-03, PNorm = 183.1236, GNorm = 0.2444, lr_0 = 1.8604e-04
Loss = 3.2416e-03, PNorm = 183.1273, GNorm = 0.1551, lr_0 = 1.8592e-04
Loss = 2.9252e-03, PNorm = 183.1324, GNorm = 0.2196, lr_0 = 1.8579e-04
Loss = 6.5961e-03, PNorm = 183.1337, GNorm = 0.2901, lr_0 = 1.8566e-04
Loss = 5.5264e-03, PNorm = 183.1390, GNorm = 0.1303, lr_0 = 1.8553e-04
Loss = 3.8005e-03, PNorm = 183.1434, GNorm = 0.1077, lr_0 = 1.8541e-04
Loss = 3.1439e-03, PNorm = 183.1483, GNorm = 0.1481, lr_0 = 1.8528e-04
Loss = 4.3172e-03, PNorm = 183.1526, GNorm = 0.2058, lr_0 = 1.8515e-04
Loss = 9.8427e-03, PNorm = 183.1559, GNorm = 1.1567, lr_0 = 1.8503e-04
Loss = 6.5552e-03, PNorm = 183.1599, GNorm = 0.1050, lr_0 = 1.8490e-04
Loss = 4.6333e-03, PNorm = 183.1669, GNorm = 0.1886, lr_0 = 1.8477e-04
Loss = 3.9554e-03, PNorm = 183.1720, GNorm = 0.1482, lr_0 = 1.8465e-04
Loss = 7.1710e-03, PNorm = 183.1760, GNorm = 0.1659, lr_0 = 1.8452e-04
Loss = 7.7460e-03, PNorm = 183.1797, GNorm = 0.1123, lr_0 = 1.8439e-04
Loss = 8.6236e-03, PNorm = 183.1842, GNorm = 0.1447, lr_0 = 1.8427e-04
Loss = 3.2994e-03, PNorm = 183.1887, GNorm = 0.1110, lr_0 = 1.8414e-04
Loss = 3.7015e-03, PNorm = 183.1944, GNorm = 0.1332, lr_0 = 1.8401e-04
Loss = 5.2224e-03, PNorm = 183.1995, GNorm = 0.2788, lr_0 = 1.8389e-04
Loss = 5.8980e-03, PNorm = 183.2033, GNorm = 0.1045, lr_0 = 1.8376e-04
Loss = 3.8982e-03, PNorm = 183.2059, GNorm = 0.0881, lr_0 = 1.8364e-04
Loss = 3.6457e-03, PNorm = 183.2094, GNorm = 0.2392, lr_0 = 1.8351e-04
Loss = 3.3796e-03, PNorm = 183.2122, GNorm = 0.0952, lr_0 = 1.8338e-04
Loss = 2.8454e-03, PNorm = 183.2163, GNorm = 0.1156, lr_0 = 1.8326e-04
Loss = 5.0716e-03, PNorm = 183.2212, GNorm = 0.2137, lr_0 = 1.8313e-04
Loss = 4.3291e-03, PNorm = 183.2255, GNorm = 0.1842, lr_0 = 1.8301e-04
Loss = 4.9838e-03, PNorm = 183.2316, GNorm = 0.2242, lr_0 = 1.8288e-04
Loss = 5.3600e-03, PNorm = 183.2362, GNorm = 0.1386, lr_0 = 1.8276e-04
Loss = 7.3669e-03, PNorm = 183.2419, GNorm = 0.1621, lr_0 = 1.8263e-04
Loss = 3.8676e-03, PNorm = 183.2466, GNorm = 0.1643, lr_0 = 1.8251e-04
Loss = 4.8332e-03, PNorm = 183.2505, GNorm = 0.1260, lr_0 = 1.8238e-04
Loss = 2.9468e-03, PNorm = 183.2541, GNorm = 0.2392, lr_0 = 1.8226e-04
Loss = 2.9365e-03, PNorm = 183.2563, GNorm = 0.1575, lr_0 = 1.8213e-04
Loss = 4.8346e-03, PNorm = 183.2586, GNorm = 0.1831, lr_0 = 1.8201e-04
Loss = 4.1444e-03, PNorm = 183.2632, GNorm = 0.0693, lr_0 = 1.8188e-04
Loss = 7.9243e-03, PNorm = 183.2677, GNorm = 0.1234, lr_0 = 1.8176e-04
Loss = 5.7143e-03, PNorm = 183.2723, GNorm = 0.1549, lr_0 = 1.8163e-04
Loss = 3.9414e-03, PNorm = 183.2756, GNorm = 0.1565, lr_0 = 1.8151e-04
Loss = 5.2148e-03, PNorm = 183.2791, GNorm = 0.1942, lr_0 = 1.8138e-04
Loss = 4.2487e-03, PNorm = 183.2831, GNorm = 0.2804, lr_0 = 1.8126e-04
Loss = 3.0751e-03, PNorm = 183.2880, GNorm = 0.1284, lr_0 = 1.8114e-04
Loss = 5.7135e-03, PNorm = 183.2928, GNorm = 0.1227, lr_0 = 1.8101e-04
Loss = 4.3382e-03, PNorm = 183.2975, GNorm = 0.1298, lr_0 = 1.8089e-04
Loss = 5.7002e-03, PNorm = 183.3027, GNorm = 0.1724, lr_0 = 1.8076e-04
Loss = 4.5625e-03, PNorm = 183.3057, GNorm = 0.0788, lr_0 = 1.8064e-04
Loss = 2.4513e-03, PNorm = 183.3073, GNorm = 0.1971, lr_0 = 1.8052e-04
Loss = 6.6146e-03, PNorm = 183.3097, GNorm = 0.6189, lr_0 = 1.8039e-04
Loss = 6.2855e-03, PNorm = 183.3131, GNorm = 0.2796, lr_0 = 1.8027e-04
Loss = 1.4920e-02, PNorm = 183.3175, GNorm = 0.3648, lr_0 = 1.8015e-04
Loss = 2.8830e-03, PNorm = 183.3220, GNorm = 0.0978, lr_0 = 1.8002e-04
Loss = 7.0720e-03, PNorm = 183.3288, GNorm = 0.3465, lr_0 = 1.7990e-04
Loss = 6.1253e-03, PNorm = 183.3368, GNorm = 0.3159, lr_0 = 1.7978e-04
Loss = 7.7818e-03, PNorm = 183.3430, GNorm = 0.0992, lr_0 = 1.7965e-04
Loss = 4.6300e-03, PNorm = 183.3485, GNorm = 0.1793, lr_0 = 1.7953e-04
Loss = 3.7331e-03, PNorm = 183.3525, GNorm = 0.0808, lr_0 = 1.7941e-04
Loss = 5.8353e-03, PNorm = 183.3570, GNorm = 0.1062, lr_0 = 1.7928e-04
Loss = 3.5325e-03, PNorm = 183.3598, GNorm = 0.2055, lr_0 = 1.7916e-04
Loss = 9.5536e-03, PNorm = 183.3627, GNorm = 0.9379, lr_0 = 1.7904e-04
Loss = 3.0911e-03, PNorm = 183.3659, GNorm = 0.1061, lr_0 = 1.7892e-04
Loss = 8.0144e-03, PNorm = 183.3706, GNorm = 0.1727, lr_0 = 1.7879e-04
Loss = 5.8581e-03, PNorm = 183.3730, GNorm = 0.1553, lr_0 = 1.7867e-04
Loss = 5.0657e-03, PNorm = 183.3757, GNorm = 0.1257, lr_0 = 1.7855e-04
Loss = 4.9000e-03, PNorm = 183.3775, GNorm = 0.1223, lr_0 = 1.7843e-04
Loss = 4.3611e-03, PNorm = 183.3810, GNorm = 0.1149, lr_0 = 1.7830e-04
Loss = 1.1220e-02, PNorm = 183.3884, GNorm = 0.1115, lr_0 = 1.7818e-04
Loss = 3.2436e-03, PNorm = 183.3935, GNorm = 0.1262, lr_0 = 1.7806e-04
Loss = 4.5551e-03, PNorm = 183.3984, GNorm = 0.2622, lr_0 = 1.7794e-04
Loss = 1.0631e-02, PNorm = 183.4030, GNorm = 0.1181, lr_0 = 1.7782e-04
Validation mae = 0.120736
Epoch 23
Loss = 6.1527e-03, PNorm = 183.4059, GNorm = 0.3386, lr_0 = 1.7769e-04
Loss = 2.8022e-03, PNorm = 183.4084, GNorm = 0.0759, lr_0 = 1.7757e-04
Loss = 5.1382e-03, PNorm = 183.4117, GNorm = 0.1410, lr_0 = 1.7745e-04
Loss = 3.7392e-03, PNorm = 183.4164, GNorm = 0.0667, lr_0 = 1.7733e-04
Loss = 3.7532e-03, PNorm = 183.4190, GNorm = 0.1370, lr_0 = 1.7721e-04
Loss = 3.3909e-03, PNorm = 183.4211, GNorm = 0.1880, lr_0 = 1.7709e-04
Loss = 2.8862e-03, PNorm = 183.4237, GNorm = 0.1100, lr_0 = 1.7696e-04
Loss = 3.5010e-03, PNorm = 183.4268, GNorm = 0.1202, lr_0 = 1.7684e-04
Loss = 2.7145e-03, PNorm = 183.4294, GNorm = 0.1107, lr_0 = 1.7672e-04
Loss = 6.0197e-03, PNorm = 183.4312, GNorm = 0.1134, lr_0 = 1.7660e-04
Loss = 3.7887e-03, PNorm = 183.4325, GNorm = 0.1272, lr_0 = 1.7648e-04
Loss = 9.8313e-03, PNorm = 183.4371, GNorm = 0.1392, lr_0 = 1.7636e-04
Loss = 3.9000e-03, PNorm = 183.4380, GNorm = 0.1036, lr_0 = 1.7624e-04
Loss = 7.2035e-03, PNorm = 183.4418, GNorm = 0.2466, lr_0 = 1.7612e-04
Loss = 4.4946e-03, PNorm = 183.4461, GNorm = 0.0738, lr_0 = 1.7600e-04
Loss = 2.6212e-03, PNorm = 183.4500, GNorm = 0.0836, lr_0 = 1.7588e-04
Loss = 4.8924e-03, PNorm = 183.4537, GNorm = 0.0756, lr_0 = 1.7576e-04
Loss = 7.3540e-03, PNorm = 183.4575, GNorm = 0.2239, lr_0 = 1.7564e-04
Loss = 3.7097e-03, PNorm = 183.4609, GNorm = 0.1463, lr_0 = 1.7552e-04
Loss = 4.6364e-03, PNorm = 183.4626, GNorm = 0.2289, lr_0 = 1.7540e-04
Loss = 4.4499e-03, PNorm = 183.4655, GNorm = 0.1898, lr_0 = 1.7528e-04
Loss = 4.7746e-03, PNorm = 183.4705, GNorm = 0.0856, lr_0 = 1.7516e-04
Loss = 2.7799e-03, PNorm = 183.4741, GNorm = 0.1678, lr_0 = 1.7504e-04
Loss = 4.7339e-03, PNorm = 183.4774, GNorm = 0.1385, lr_0 = 1.7492e-04
Loss = 3.4375e-03, PNorm = 183.4797, GNorm = 0.4454, lr_0 = 1.7480e-04
Loss = 4.2809e-03, PNorm = 183.4826, GNorm = 0.0792, lr_0 = 1.7468e-04
Loss = 4.5058e-03, PNorm = 183.4843, GNorm = 0.2162, lr_0 = 1.7456e-04
Loss = 2.2558e-03, PNorm = 183.4855, GNorm = 0.0742, lr_0 = 1.7444e-04
Loss = 2.8265e-03, PNorm = 183.4868, GNorm = 0.1915, lr_0 = 1.7432e-04
Loss = 4.7131e-03, PNorm = 183.4903, GNorm = 0.1729, lr_0 = 1.7420e-04
Loss = 5.0204e-03, PNorm = 183.4918, GNorm = 0.1334, lr_0 = 1.7408e-04
Loss = 2.6099e-03, PNorm = 183.4933, GNorm = 0.1407, lr_0 = 1.7396e-04
Loss = 3.4016e-03, PNorm = 183.4961, GNorm = 0.2781, lr_0 = 1.7384e-04
Loss = 4.6569e-03, PNorm = 183.4990, GNorm = 0.1140, lr_0 = 1.7372e-04
Loss = 3.6640e-03, PNorm = 183.5018, GNorm = 0.3011, lr_0 = 1.7360e-04
Loss = 4.9512e-03, PNorm = 183.5047, GNorm = 0.2916, lr_0 = 1.7348e-04
Loss = 2.6589e-03, PNorm = 183.5068, GNorm = 0.1447, lr_0 = 1.7336e-04
Loss = 4.6650e-03, PNorm = 183.5081, GNorm = 0.1884, lr_0 = 1.7325e-04
Loss = 5.3083e-03, PNorm = 183.5099, GNorm = 0.0593, lr_0 = 1.7313e-04
Loss = 7.6019e-03, PNorm = 183.5125, GNorm = 0.8466, lr_0 = 1.7301e-04
Loss = 5.0103e-03, PNorm = 183.5143, GNorm = 0.3267, lr_0 = 1.7289e-04
Loss = 5.1458e-03, PNorm = 183.5173, GNorm = 0.1647, lr_0 = 1.7277e-04
Loss = 5.2592e-03, PNorm = 183.5183, GNorm = 0.2131, lr_0 = 1.7265e-04
Loss = 4.3281e-03, PNorm = 183.5221, GNorm = 0.0834, lr_0 = 1.7253e-04
Loss = 5.2958e-03, PNorm = 183.5257, GNorm = 0.1471, lr_0 = 1.7242e-04
Loss = 3.2959e-03, PNorm = 183.5282, GNorm = 0.0962, lr_0 = 1.7230e-04
Loss = 3.1637e-03, PNorm = 183.5320, GNorm = 0.0478, lr_0 = 1.7218e-04
Loss = 5.7791e-03, PNorm = 183.5334, GNorm = 0.1636, lr_0 = 1.7206e-04
Loss = 5.3910e-03, PNorm = 183.5356, GNorm = 0.1750, lr_0 = 1.7194e-04
Loss = 4.0986e-03, PNorm = 183.5386, GNorm = 0.1906, lr_0 = 1.7183e-04
Loss = 4.2410e-03, PNorm = 183.5434, GNorm = 0.5440, lr_0 = 1.7171e-04
Loss = 3.3429e-03, PNorm = 183.5461, GNorm = 0.1048, lr_0 = 1.7159e-04
Loss = 8.5589e-03, PNorm = 183.5508, GNorm = 0.1060, lr_0 = 1.7147e-04
Loss = 7.8373e-03, PNorm = 183.5553, GNorm = 0.4827, lr_0 = 1.7136e-04
Loss = 7.2591e-03, PNorm = 183.5589, GNorm = 0.2013, lr_0 = 1.7124e-04
Loss = 4.7391e-03, PNorm = 183.5640, GNorm = 0.1300, lr_0 = 1.7112e-04
Loss = 7.4908e-03, PNorm = 183.5652, GNorm = 0.1449, lr_0 = 1.7100e-04
Loss = 2.2671e-03, PNorm = 183.5672, GNorm = 0.1552, lr_0 = 1.7089e-04
Loss = 4.9266e-03, PNorm = 183.5668, GNorm = 0.1465, lr_0 = 1.7077e-04
Loss = 4.2984e-03, PNorm = 183.5687, GNorm = 0.0690, lr_0 = 1.7065e-04
Loss = 2.2245e-03, PNorm = 183.5724, GNorm = 0.1390, lr_0 = 1.7054e-04
Loss = 3.0200e-03, PNorm = 183.5768, GNorm = 0.0602, lr_0 = 1.7042e-04
Loss = 8.6301e-03, PNorm = 183.5809, GNorm = 0.1867, lr_0 = 1.7030e-04
Loss = 7.9662e-03, PNorm = 183.5846, GNorm = 0.0954, lr_0 = 1.7019e-04
Loss = 3.2767e-03, PNorm = 183.5900, GNorm = 0.1248, lr_0 = 1.7007e-04
Loss = 4.9306e-03, PNorm = 183.5930, GNorm = 0.0642, lr_0 = 1.6995e-04
Loss = 6.3668e-03, PNorm = 183.5958, GNorm = 0.0884, lr_0 = 1.6984e-04
Loss = 3.6603e-03, PNorm = 183.5979, GNorm = 0.0955, lr_0 = 1.6972e-04
Loss = 2.3254e-03, PNorm = 183.6011, GNorm = 0.0578, lr_0 = 1.6960e-04
Loss = 3.9216e-03, PNorm = 183.6041, GNorm = 0.0821, lr_0 = 1.6949e-04
Loss = 2.5882e-03, PNorm = 183.6078, GNorm = 0.2105, lr_0 = 1.6937e-04
Loss = 4.4137e-03, PNorm = 183.6116, GNorm = 0.1444, lr_0 = 1.6926e-04
Loss = 3.0337e-03, PNorm = 183.6146, GNorm = 0.2431, lr_0 = 1.6914e-04
Loss = 9.9713e-03, PNorm = 183.6179, GNorm = 0.0669, lr_0 = 1.6902e-04
Loss = 6.7656e-03, PNorm = 183.6200, GNorm = 0.0842, lr_0 = 1.6891e-04
Loss = 2.5790e-03, PNorm = 183.6226, GNorm = 0.0847, lr_0 = 1.6879e-04
Loss = 2.7066e-03, PNorm = 183.6254, GNorm = 0.0402, lr_0 = 1.6868e-04
Loss = 4.4085e-03, PNorm = 183.6284, GNorm = 0.1354, lr_0 = 1.6856e-04
Loss = 3.4602e-03, PNorm = 183.6310, GNorm = 0.3290, lr_0 = 1.6845e-04
Loss = 4.4307e-03, PNorm = 183.6344, GNorm = 0.0870, lr_0 = 1.6833e-04
Loss = 2.3991e-03, PNorm = 183.6375, GNorm = 0.1460, lr_0 = 1.6821e-04
Loss = 2.3820e-03, PNorm = 183.6405, GNorm = 0.2208, lr_0 = 1.6810e-04
Loss = 3.3148e-03, PNorm = 183.6442, GNorm = 0.1284, lr_0 = 1.6798e-04
Loss = 7.6394e-03, PNorm = 183.6470, GNorm = 0.2032, lr_0 = 1.6787e-04
Loss = 4.8186e-03, PNorm = 183.6496, GNorm = 0.1181, lr_0 = 1.6775e-04
Loss = 4.9392e-03, PNorm = 183.6522, GNorm = 0.1031, lr_0 = 1.6764e-04
Loss = 2.9219e-03, PNorm = 183.6551, GNorm = 0.0841, lr_0 = 1.6752e-04
Loss = 5.0017e-03, PNorm = 183.6549, GNorm = 0.0801, lr_0 = 1.6741e-04
Loss = 6.4091e-03, PNorm = 183.6567, GNorm = 0.1121, lr_0 = 1.6729e-04
Loss = 2.9462e-03, PNorm = 183.6602, GNorm = 0.4041, lr_0 = 1.6718e-04
Loss = 3.6181e-03, PNorm = 183.6634, GNorm = 0.0875, lr_0 = 1.6707e-04
Loss = 7.2604e-03, PNorm = 183.6662, GNorm = 0.1760, lr_0 = 1.6695e-04
Loss = 5.0576e-03, PNorm = 183.6710, GNorm = 0.1490, lr_0 = 1.6684e-04
Loss = 7.1798e-03, PNorm = 183.6755, GNorm = 0.0884, lr_0 = 1.6672e-04
Loss = 5.0243e-03, PNorm = 183.6802, GNorm = 0.1168, lr_0 = 1.6661e-04
Loss = 5.5058e-03, PNorm = 183.6847, GNorm = 0.1885, lr_0 = 1.6649e-04
Loss = 3.9842e-03, PNorm = 183.6879, GNorm = 1.0808, lr_0 = 1.6638e-04
Loss = 2.1342e-03, PNorm = 183.6896, GNorm = 0.0934, lr_0 = 1.6627e-04
Loss = 8.7460e-03, PNorm = 183.6918, GNorm = 0.1433, lr_0 = 1.6615e-04
Loss = 2.8591e-03, PNorm = 183.6951, GNorm = 0.0998, lr_0 = 1.6604e-04
Loss = 5.0291e-03, PNorm = 183.6983, GNorm = 0.1836, lr_0 = 1.6592e-04
Loss = 2.6738e-03, PNorm = 183.7021, GNorm = 0.2069, lr_0 = 1.6581e-04
Loss = 3.0720e-03, PNorm = 183.7056, GNorm = 0.0541, lr_0 = 1.6570e-04
Loss = 5.4142e-03, PNorm = 183.7100, GNorm = 0.1976, lr_0 = 1.6558e-04
Loss = 6.7524e-03, PNorm = 183.7123, GNorm = 0.1314, lr_0 = 1.6547e-04
Loss = 7.9602e-03, PNorm = 183.7171, GNorm = 0.2810, lr_0 = 1.6536e-04
Loss = 3.1698e-03, PNorm = 183.7219, GNorm = 0.0907, lr_0 = 1.6524e-04
Loss = 3.5064e-03, PNorm = 183.7257, GNorm = 0.0620, lr_0 = 1.6513e-04
Loss = 4.2791e-03, PNorm = 183.7288, GNorm = 0.1136, lr_0 = 1.6502e-04
Loss = 3.3708e-03, PNorm = 183.7340, GNorm = 0.1104, lr_0 = 1.6490e-04
Loss = 3.3550e-03, PNorm = 183.7383, GNorm = 0.0990, lr_0 = 1.6479e-04
Loss = 2.7724e-03, PNorm = 183.7417, GNorm = 0.1673, lr_0 = 1.6468e-04
Loss = 7.0191e-03, PNorm = 183.7449, GNorm = 0.2193, lr_0 = 1.6457e-04
Loss = 5.7434e-03, PNorm = 183.7486, GNorm = 0.2655, lr_0 = 1.6445e-04
Loss = 1.3338e-02, PNorm = 183.7529, GNorm = 0.3020, lr_0 = 1.6434e-04
Loss = 3.5306e-03, PNorm = 183.7584, GNorm = 0.1219, lr_0 = 1.6423e-04
Loss = 5.5145e-03, PNorm = 183.7627, GNorm = 0.1696, lr_0 = 1.6412e-04
Loss = 3.1633e-03, PNorm = 183.7665, GNorm = 0.1934, lr_0 = 1.6400e-04
Loss = 2.4138e-03, PNorm = 183.7697, GNorm = 0.1389, lr_0 = 1.6389e-04
Loss = 2.6604e-03, PNorm = 183.7735, GNorm = 0.0772, lr_0 = 1.6378e-04
Validation mae = 0.120869
Epoch 24
Loss = 2.4250e-03, PNorm = 183.7766, GNorm = 0.1376, lr_0 = 1.6367e-04
Loss = 3.2321e-03, PNorm = 183.7777, GNorm = 0.0824, lr_0 = 1.6355e-04
Loss = 2.0869e-03, PNorm = 183.7794, GNorm = 0.0969, lr_0 = 1.6344e-04
Loss = 3.7940e-03, PNorm = 183.7810, GNorm = 0.1121, lr_0 = 1.6333e-04
Loss = 1.6241e-02, PNorm = 183.7825, GNorm = 0.1526, lr_0 = 1.6322e-04
Loss = 2.6296e-03, PNorm = 183.7844, GNorm = 0.2465, lr_0 = 1.6311e-04
Loss = 3.1496e-03, PNorm = 183.7856, GNorm = 0.1892, lr_0 = 1.6299e-04
Loss = 2.5100e-03, PNorm = 183.7890, GNorm = 0.1507, lr_0 = 1.6288e-04
Loss = 3.5025e-03, PNorm = 183.7908, GNorm = 0.2281, lr_0 = 1.6277e-04
Loss = 7.6493e-03, PNorm = 183.7930, GNorm = 0.1060, lr_0 = 1.6266e-04
Loss = 2.6599e-03, PNorm = 183.7959, GNorm = 0.0643, lr_0 = 1.6255e-04
Loss = 5.8474e-03, PNorm = 183.7988, GNorm = 0.1024, lr_0 = 1.6244e-04
Loss = 2.7430e-03, PNorm = 183.8020, GNorm = 0.0769, lr_0 = 1.6233e-04
Loss = 1.1007e-02, PNorm = 183.8039, GNorm = 0.1420, lr_0 = 1.6221e-04
Loss = 3.5244e-03, PNorm = 183.8083, GNorm = 0.1374, lr_0 = 1.6210e-04
Loss = 3.0693e-03, PNorm = 183.8120, GNorm = 0.1332, lr_0 = 1.6199e-04
Loss = 3.9884e-03, PNorm = 183.8143, GNorm = 0.0839, lr_0 = 1.6188e-04
Loss = 2.9219e-03, PNorm = 183.8165, GNorm = 0.1473, lr_0 = 1.6177e-04
Loss = 2.5393e-03, PNorm = 183.8193, GNorm = 0.1143, lr_0 = 1.6166e-04
Loss = 2.5406e-03, PNorm = 183.8227, GNorm = 0.1730, lr_0 = 1.6155e-04
Loss = 7.5555e-03, PNorm = 183.8257, GNorm = 0.1833, lr_0 = 1.6144e-04
Loss = 2.8800e-03, PNorm = 183.8291, GNorm = 0.0898, lr_0 = 1.6133e-04
Loss = 4.2328e-03, PNorm = 183.8317, GNorm = 0.1565, lr_0 = 1.6122e-04
Loss = 5.4946e-03, PNorm = 183.8347, GNorm = 0.2554, lr_0 = 1.6111e-04
Loss = 4.3007e-03, PNorm = 183.8363, GNorm = 0.0768, lr_0 = 1.6100e-04
Loss = 4.1406e-03, PNorm = 183.8394, GNorm = 0.0615, lr_0 = 1.6089e-04
Loss = 4.0655e-03, PNorm = 183.8427, GNorm = 0.1716, lr_0 = 1.6078e-04
Loss = 4.5763e-03, PNorm = 183.8448, GNorm = 0.1597, lr_0 = 1.6067e-04
Loss = 4.0871e-03, PNorm = 183.8478, GNorm = 0.0432, lr_0 = 1.6056e-04
Loss = 2.7104e-03, PNorm = 183.8506, GNorm = 0.0582, lr_0 = 1.6045e-04
Loss = 6.6120e-03, PNorm = 183.8535, GNorm = 1.3488, lr_0 = 1.6034e-04
Loss = 5.1096e-03, PNorm = 183.8562, GNorm = 0.0804, lr_0 = 1.6023e-04
Loss = 3.3190e-03, PNorm = 183.8597, GNorm = 0.0969, lr_0 = 1.6012e-04
Loss = 3.2108e-03, PNorm = 183.8619, GNorm = 0.1801, lr_0 = 1.6001e-04
Loss = 3.3542e-03, PNorm = 183.8643, GNorm = 0.1246, lr_0 = 1.5990e-04
Loss = 2.6699e-03, PNorm = 183.8666, GNorm = 0.0689, lr_0 = 1.5979e-04
Loss = 3.6058e-03, PNorm = 183.8676, GNorm = 0.1171, lr_0 = 1.5968e-04
Loss = 2.2497e-03, PNorm = 183.8687, GNorm = 0.1729, lr_0 = 1.5957e-04
Loss = 2.5755e-03, PNorm = 183.8725, GNorm = 0.1018, lr_0 = 1.5946e-04
Loss = 7.7599e-03, PNorm = 183.8756, GNorm = 0.1439, lr_0 = 1.5935e-04
Loss = 1.9942e-03, PNorm = 183.8784, GNorm = 0.1002, lr_0 = 1.5924e-04
Loss = 1.9440e-03, PNorm = 183.8807, GNorm = 0.1274, lr_0 = 1.5913e-04
Loss = 2.5447e-03, PNorm = 183.8842, GNorm = 0.0803, lr_0 = 1.5902e-04
Loss = 4.8436e-03, PNorm = 183.8873, GNorm = 0.1195, lr_0 = 1.5891e-04
Loss = 5.2777e-03, PNorm = 183.8886, GNorm = 0.1399, lr_0 = 1.5880e-04
Loss = 2.8567e-03, PNorm = 183.8907, GNorm = 0.0829, lr_0 = 1.5870e-04
Loss = 2.1407e-03, PNorm = 183.8931, GNorm = 0.0904, lr_0 = 1.5859e-04
Loss = 3.4420e-03, PNorm = 183.8959, GNorm = 0.3223, lr_0 = 1.5848e-04
Loss = 5.3714e-03, PNorm = 183.8972, GNorm = 0.1903, lr_0 = 1.5837e-04
Loss = 2.6895e-03, PNorm = 183.8982, GNorm = 0.1668, lr_0 = 1.5826e-04
Loss = 2.8571e-03, PNorm = 183.9011, GNorm = 0.0665, lr_0 = 1.5815e-04
Loss = 4.3360e-03, PNorm = 183.9030, GNorm = 0.1023, lr_0 = 1.5804e-04
Loss = 3.0800e-03, PNorm = 183.9050, GNorm = 0.0613, lr_0 = 1.5794e-04
Loss = 2.0548e-03, PNorm = 183.9081, GNorm = 0.0429, lr_0 = 1.5783e-04
Loss = 3.4360e-03, PNorm = 183.9112, GNorm = 0.1083, lr_0 = 1.5772e-04
Loss = 5.8972e-03, PNorm = 183.9124, GNorm = 0.0822, lr_0 = 1.5761e-04
Loss = 2.3839e-03, PNorm = 183.9148, GNorm = 0.3272, lr_0 = 1.5750e-04
Loss = 2.2410e-03, PNorm = 183.9187, GNorm = 0.2652, lr_0 = 1.5740e-04
Loss = 4.6845e-03, PNorm = 183.9193, GNorm = 0.0822, lr_0 = 1.5729e-04
Loss = 3.2494e-03, PNorm = 183.9211, GNorm = 0.1086, lr_0 = 1.5718e-04
Loss = 5.3739e-03, PNorm = 183.9252, GNorm = 0.0775, lr_0 = 1.5707e-04
Loss = 5.2370e-03, PNorm = 183.9292, GNorm = 0.0847, lr_0 = 1.5697e-04
Loss = 2.7468e-03, PNorm = 183.9324, GNorm = 0.0901, lr_0 = 1.5686e-04
Loss = 4.2868e-03, PNorm = 183.9359, GNorm = 0.2650, lr_0 = 1.5675e-04
Loss = 5.1146e-03, PNorm = 183.9395, GNorm = 0.1137, lr_0 = 1.5664e-04
Loss = 3.8097e-03, PNorm = 183.9425, GNorm = 0.1435, lr_0 = 1.5654e-04
Loss = 4.6466e-03, PNorm = 183.9456, GNorm = 0.0803, lr_0 = 1.5643e-04
Loss = 3.7710e-03, PNorm = 183.9481, GNorm = 0.1811, lr_0 = 1.5632e-04
Loss = 3.8782e-03, PNorm = 183.9513, GNorm = 0.1309, lr_0 = 1.5621e-04
Loss = 2.1536e-03, PNorm = 183.9532, GNorm = 0.0628, lr_0 = 1.5611e-04
Loss = 4.3096e-03, PNorm = 183.9552, GNorm = 0.0534, lr_0 = 1.5600e-04
Loss = 7.2171e-03, PNorm = 183.9565, GNorm = 0.6078, lr_0 = 1.5589e-04
Loss = 2.1852e-03, PNorm = 183.9580, GNorm = 0.0833, lr_0 = 1.5579e-04
Loss = 6.3977e-03, PNorm = 183.9620, GNorm = 0.3161, lr_0 = 1.5568e-04
Loss = 4.3152e-03, PNorm = 183.9649, GNorm = 0.0824, lr_0 = 1.5557e-04
Loss = 3.5961e-03, PNorm = 183.9680, GNorm = 0.0658, lr_0 = 1.5547e-04
Loss = 2.7775e-03, PNorm = 183.9693, GNorm = 0.2539, lr_0 = 1.5536e-04
Loss = 4.8149e-03, PNorm = 183.9712, GNorm = 0.0823, lr_0 = 1.5525e-04
Loss = 3.9727e-03, PNorm = 183.9735, GNorm = 0.1218, lr_0 = 1.5515e-04
Loss = 8.1354e-03, PNorm = 183.9749, GNorm = 0.1018, lr_0 = 1.5504e-04
Loss = 4.0942e-03, PNorm = 183.9782, GNorm = 0.0845, lr_0 = 1.5493e-04
Loss = 4.4180e-03, PNorm = 183.9825, GNorm = 0.1173, lr_0 = 1.5483e-04
Loss = 2.9545e-03, PNorm = 183.9860, GNorm = 0.0855, lr_0 = 1.5472e-04
Loss = 2.7448e-03, PNorm = 183.9893, GNorm = 0.2420, lr_0 = 1.5462e-04
Loss = 3.1011e-03, PNorm = 183.9937, GNorm = 0.0837, lr_0 = 1.5451e-04
Loss = 1.7849e-03, PNorm = 183.9962, GNorm = 0.0637, lr_0 = 1.5440e-04
Loss = 5.9659e-03, PNorm = 183.9981, GNorm = 0.2277, lr_0 = 1.5430e-04
Loss = 3.7250e-03, PNorm = 184.0017, GNorm = 0.1157, lr_0 = 1.5419e-04
Loss = 2.2994e-03, PNorm = 184.0055, GNorm = 0.1120, lr_0 = 1.5409e-04
Loss = 5.6050e-03, PNorm = 184.0089, GNorm = 0.0670, lr_0 = 1.5398e-04
Loss = 3.2691e-03, PNorm = 184.0121, GNorm = 0.0753, lr_0 = 1.5388e-04
Loss = 7.9550e-03, PNorm = 184.0150, GNorm = 0.1302, lr_0 = 1.5377e-04
Loss = 4.2704e-03, PNorm = 184.0194, GNorm = 0.1099, lr_0 = 1.5367e-04
Loss = 4.3224e-03, PNorm = 184.0243, GNorm = 0.1732, lr_0 = 1.5356e-04
Loss = 4.3524e-03, PNorm = 184.0265, GNorm = 0.2333, lr_0 = 1.5346e-04
Loss = 1.8282e-03, PNorm = 184.0280, GNorm = 0.1190, lr_0 = 1.5335e-04
Loss = 2.9657e-03, PNorm = 184.0291, GNorm = 0.1431, lr_0 = 1.5325e-04
Loss = 4.1570e-03, PNorm = 184.0303, GNorm = 0.0657, lr_0 = 1.5314e-04
Loss = 3.7176e-03, PNorm = 184.0327, GNorm = 0.1722, lr_0 = 1.5304e-04
Loss = 2.0936e-03, PNorm = 184.0363, GNorm = 0.1076, lr_0 = 1.5293e-04
Loss = 2.1493e-03, PNorm = 184.0399, GNorm = 0.0498, lr_0 = 1.5283e-04
Loss = 5.6196e-03, PNorm = 184.0415, GNorm = 0.1106, lr_0 = 1.5272e-04
Loss = 2.1416e-03, PNorm = 184.0426, GNorm = 0.0467, lr_0 = 1.5262e-04
Loss = 4.9092e-03, PNorm = 184.0455, GNorm = 0.1843, lr_0 = 1.5251e-04
Loss = 2.2320e-03, PNorm = 184.0481, GNorm = 0.1335, lr_0 = 1.5241e-04
Loss = 7.0154e-03, PNorm = 184.0492, GNorm = 0.2106, lr_0 = 1.5230e-04
Loss = 7.5361e-03, PNorm = 184.0532, GNorm = 0.1127, lr_0 = 1.5220e-04
Loss = 7.3507e-03, PNorm = 184.0581, GNorm = 0.1331, lr_0 = 1.5209e-04
Loss = 3.0199e-03, PNorm = 184.0608, GNorm = 0.1586, lr_0 = 1.5199e-04
Loss = 4.6706e-03, PNorm = 184.0623, GNorm = 0.1078, lr_0 = 1.5189e-04
Loss = 2.4717e-03, PNorm = 184.0652, GNorm = 0.1220, lr_0 = 1.5178e-04
Loss = 8.1607e-03, PNorm = 184.0696, GNorm = 0.1366, lr_0 = 1.5168e-04
Loss = 3.6175e-03, PNorm = 184.0741, GNorm = 0.2321, lr_0 = 1.5157e-04
Loss = 5.0532e-03, PNorm = 184.0762, GNorm = 0.1896, lr_0 = 1.5147e-04
Loss = 4.8074e-03, PNorm = 184.0779, GNorm = 0.0602, lr_0 = 1.5137e-04
Loss = 1.0019e-02, PNorm = 184.0793, GNorm = 0.3005, lr_0 = 1.5126e-04
Loss = 8.7372e-03, PNorm = 184.0824, GNorm = 0.1297, lr_0 = 1.5116e-04
Loss = 8.2565e-03, PNorm = 184.0856, GNorm = 0.1673, lr_0 = 1.5106e-04
Loss = 3.9704e-03, PNorm = 184.0896, GNorm = 0.1427, lr_0 = 1.5095e-04
Loss = 2.7934e-03, PNorm = 184.0938, GNorm = 0.1560, lr_0 = 1.5085e-04
Validation mae = 0.121061
Epoch 25
Loss = 3.6763e-03, PNorm = 184.0964, GNorm = 0.1016, lr_0 = 1.5075e-04
Loss = 2.4140e-03, PNorm = 184.0996, GNorm = 0.1621, lr_0 = 1.5064e-04
Loss = 2.5225e-03, PNorm = 184.1014, GNorm = 0.2292, lr_0 = 1.5054e-04
Loss = 2.2294e-03, PNorm = 184.1023, GNorm = 0.1195, lr_0 = 1.5044e-04
Loss = 2.3486e-03, PNorm = 184.1041, GNorm = 0.1987, lr_0 = 1.5033e-04
Loss = 4.8193e-03, PNorm = 184.1051, GNorm = 0.2290, lr_0 = 1.5023e-04
Loss = 5.0429e-03, PNorm = 184.1049, GNorm = 0.0601, lr_0 = 1.5013e-04
Loss = 2.8473e-03, PNorm = 184.1062, GNorm = 0.0676, lr_0 = 1.5002e-04
Loss = 2.9517e-03, PNorm = 184.1075, GNorm = 0.0606, lr_0 = 1.4992e-04
Loss = 2.6262e-03, PNorm = 184.1088, GNorm = 0.1030, lr_0 = 1.4982e-04
Loss = 3.0069e-03, PNorm = 184.1119, GNorm = 0.0913, lr_0 = 1.4972e-04
Loss = 2.0712e-03, PNorm = 184.1155, GNorm = 0.0470, lr_0 = 1.4961e-04
Loss = 2.7507e-03, PNorm = 184.1192, GNorm = 0.1327, lr_0 = 1.4951e-04
Loss = 1.9144e-03, PNorm = 184.1223, GNorm = 0.1879, lr_0 = 1.4941e-04
Loss = 5.2472e-03, PNorm = 184.1231, GNorm = 0.1135, lr_0 = 1.4931e-04
Loss = 3.9656e-03, PNorm = 184.1260, GNorm = 0.0686, lr_0 = 1.4920e-04
Loss = 5.9373e-03, PNorm = 184.1289, GNorm = 0.0928, lr_0 = 1.4910e-04
Loss = 3.6340e-03, PNorm = 184.1305, GNorm = 0.1852, lr_0 = 1.4900e-04
Loss = 3.9920e-03, PNorm = 184.1330, GNorm = 0.1564, lr_0 = 1.4890e-04
Loss = 2.6645e-03, PNorm = 184.1362, GNorm = 0.0932, lr_0 = 1.4880e-04
Loss = 4.2397e-03, PNorm = 184.1387, GNorm = 0.0700, lr_0 = 1.4869e-04
Loss = 2.1159e-03, PNorm = 184.1410, GNorm = 0.1354, lr_0 = 1.4859e-04
Loss = 5.5637e-03, PNorm = 184.1428, GNorm = 0.4075, lr_0 = 1.4849e-04
Loss = 1.6184e-03, PNorm = 184.1436, GNorm = 0.1140, lr_0 = 1.4839e-04
Loss = 3.5077e-03, PNorm = 184.1451, GNorm = 0.1138, lr_0 = 1.4829e-04
Loss = 2.5468e-03, PNorm = 184.1474, GNorm = 0.2183, lr_0 = 1.4818e-04
Loss = 3.7415e-03, PNorm = 184.1493, GNorm = 0.0704, lr_0 = 1.4808e-04
Loss = 2.2011e-03, PNorm = 184.1528, GNorm = 0.1134, lr_0 = 1.4798e-04
Loss = 3.7597e-03, PNorm = 184.1554, GNorm = 0.0714, lr_0 = 1.4788e-04
Loss = 5.4369e-03, PNorm = 184.1578, GNorm = 0.0809, lr_0 = 1.4778e-04
Loss = 5.9821e-03, PNorm = 184.1619, GNorm = 0.0632, lr_0 = 1.4768e-04
Loss = 2.6299e-03, PNorm = 184.1644, GNorm = 0.0864, lr_0 = 1.4758e-04
Loss = 3.2718e-03, PNorm = 184.1656, GNorm = 0.1253, lr_0 = 1.4748e-04
Loss = 3.1748e-03, PNorm = 184.1681, GNorm = 0.0530, lr_0 = 1.4737e-04
Loss = 3.8594e-03, PNorm = 184.1700, GNorm = 0.1126, lr_0 = 1.4727e-04
Loss = 5.7495e-03, PNorm = 184.1717, GNorm = 0.0977, lr_0 = 1.4717e-04
Loss = 5.3603e-03, PNorm = 184.1743, GNorm = 0.1104, lr_0 = 1.4707e-04
Loss = 1.9177e-03, PNorm = 184.1774, GNorm = 0.1236, lr_0 = 1.4697e-04
Loss = 5.3773e-03, PNorm = 184.1790, GNorm = 0.0611, lr_0 = 1.4687e-04
Loss = 2.7326e-03, PNorm = 184.1804, GNorm = 0.1382, lr_0 = 1.4677e-04
Loss = 1.9245e-03, PNorm = 184.1815, GNorm = 0.1306, lr_0 = 1.4667e-04
Loss = 3.3763e-03, PNorm = 184.1837, GNorm = 0.9524, lr_0 = 1.4657e-04
Loss = 3.3995e-03, PNorm = 184.1876, GNorm = 0.0461, lr_0 = 1.4647e-04
Loss = 3.1251e-03, PNorm = 184.1924, GNorm = 0.0959, lr_0 = 1.4637e-04
Loss = 5.2190e-03, PNorm = 184.1945, GNorm = 0.1306, lr_0 = 1.4627e-04
Loss = 4.3720e-03, PNorm = 184.1970, GNorm = 0.1059, lr_0 = 1.4617e-04
Loss = 5.1075e-03, PNorm = 184.1995, GNorm = 0.0989, lr_0 = 1.4607e-04
Loss = 4.1150e-03, PNorm = 184.2015, GNorm = 0.1263, lr_0 = 1.4597e-04
Loss = 4.7464e-03, PNorm = 184.2034, GNorm = 0.1611, lr_0 = 1.4587e-04
Loss = 1.7898e-03, PNorm = 184.2058, GNorm = 0.0851, lr_0 = 1.4577e-04
Loss = 2.4588e-02, PNorm = 184.2109, GNorm = 0.1457, lr_0 = 1.4567e-04
Loss = 3.4362e-03, PNorm = 184.2113, GNorm = 0.1154, lr_0 = 1.4557e-04
Loss = 4.6480e-03, PNorm = 184.2119, GNorm = 0.0713, lr_0 = 1.4547e-04
Loss = 1.8520e-03, PNorm = 184.2141, GNorm = 0.1454, lr_0 = 1.4537e-04
Loss = 3.4272e-03, PNorm = 184.2154, GNorm = 0.1204, lr_0 = 1.4527e-04
Loss = 5.2202e-03, PNorm = 184.2170, GNorm = 0.2283, lr_0 = 1.4517e-04
Loss = 8.4785e-03, PNorm = 184.2187, GNorm = 0.1720, lr_0 = 1.4507e-04
Loss = 3.4653e-03, PNorm = 184.2210, GNorm = 0.2915, lr_0 = 1.4497e-04
Loss = 3.4709e-03, PNorm = 184.2228, GNorm = 0.2409, lr_0 = 1.4487e-04
Loss = 2.9954e-03, PNorm = 184.2257, GNorm = 0.0995, lr_0 = 1.4477e-04
Loss = 3.0304e-03, PNorm = 184.2282, GNorm = 0.2122, lr_0 = 1.4467e-04
Loss = 6.4668e-03, PNorm = 184.2310, GNorm = 0.1577, lr_0 = 1.4457e-04
Loss = 1.9872e-03, PNorm = 184.2337, GNorm = 0.1824, lr_0 = 1.4447e-04
Loss = 4.1083e-03, PNorm = 184.2359, GNorm = 0.1135, lr_0 = 1.4438e-04
Loss = 8.3099e-03, PNorm = 184.2382, GNorm = 0.0891, lr_0 = 1.4428e-04
Loss = 4.3350e-03, PNorm = 184.2401, GNorm = 0.1376, lr_0 = 1.4418e-04
Loss = 2.4460e-03, PNorm = 184.2430, GNorm = 0.0899, lr_0 = 1.4408e-04
Loss = 7.1682e-03, PNorm = 184.2461, GNorm = 0.0724, lr_0 = 1.4398e-04
Loss = 4.0290e-03, PNorm = 184.2478, GNorm = 0.0788, lr_0 = 1.4388e-04
Loss = 3.1055e-03, PNorm = 184.2491, GNorm = 0.0561, lr_0 = 1.4378e-04
Loss = 2.8057e-03, PNorm = 184.2509, GNorm = 0.0944, lr_0 = 1.4368e-04
Loss = 3.3718e-03, PNorm = 184.2524, GNorm = 0.1749, lr_0 = 1.4359e-04
Loss = 1.7696e-03, PNorm = 184.2543, GNorm = 0.0714, lr_0 = 1.4349e-04
Loss = 2.7108e-03, PNorm = 184.2562, GNorm = 0.1439, lr_0 = 1.4339e-04
Loss = 9.8165e-03, PNorm = 184.2567, GNorm = 0.1569, lr_0 = 1.4329e-04
Loss = 5.7383e-03, PNorm = 184.2573, GNorm = 1.0045, lr_0 = 1.4319e-04
Loss = 3.0695e-03, PNorm = 184.2593, GNorm = 0.1259, lr_0 = 1.4310e-04
Loss = 4.4486e-03, PNorm = 184.2610, GNorm = 0.1583, lr_0 = 1.4300e-04
Loss = 2.7488e-03, PNorm = 184.2642, GNorm = 0.1362, lr_0 = 1.4290e-04
Loss = 5.5642e-03, PNorm = 184.2665, GNorm = 0.3396, lr_0 = 1.4280e-04
Loss = 1.9418e-03, PNorm = 184.2676, GNorm = 0.1970, lr_0 = 1.4270e-04
Loss = 2.3635e-03, PNorm = 184.2687, GNorm = 0.1044, lr_0 = 1.4261e-04
Loss = 2.3519e-03, PNorm = 184.2707, GNorm = 0.1501, lr_0 = 1.4251e-04
Loss = 2.6238e-03, PNorm = 184.2729, GNorm = 0.1148, lr_0 = 1.4241e-04
Loss = 1.7977e-03, PNorm = 184.2754, GNorm = 0.1991, lr_0 = 1.4231e-04
Loss = 1.5018e-03, PNorm = 184.2788, GNorm = 0.0909, lr_0 = 1.4222e-04
Loss = 4.6872e-03, PNorm = 184.2820, GNorm = 0.0904, lr_0 = 1.4212e-04
Loss = 1.5462e-03, PNorm = 184.2839, GNorm = 0.1018, lr_0 = 1.4202e-04
Loss = 3.3527e-03, PNorm = 184.2860, GNorm = 0.0541, lr_0 = 1.4192e-04
Loss = 1.7860e-03, PNorm = 184.2883, GNorm = 0.1509, lr_0 = 1.4183e-04
Loss = 5.9072e-03, PNorm = 184.2899, GNorm = 1.7496, lr_0 = 1.4173e-04
Loss = 8.4183e-03, PNorm = 184.2907, GNorm = 0.2120, lr_0 = 1.4163e-04
Loss = 6.4117e-03, PNorm = 184.2944, GNorm = 0.0662, lr_0 = 1.4153e-04
Loss = 2.7621e-03, PNorm = 184.2970, GNorm = 0.1240, lr_0 = 1.4144e-04
Loss = 3.8744e-03, PNorm = 184.3004, GNorm = 0.3286, lr_0 = 1.4134e-04
Loss = 2.4847e-03, PNorm = 184.3037, GNorm = 0.0745, lr_0 = 1.4124e-04
Loss = 4.2091e-03, PNorm = 184.3064, GNorm = 0.0428, lr_0 = 1.4115e-04
Loss = 1.9735e-03, PNorm = 184.3092, GNorm = 0.0652, lr_0 = 1.4105e-04
Loss = 2.8918e-03, PNorm = 184.3126, GNorm = 0.2760, lr_0 = 1.4095e-04
Loss = 6.9108e-03, PNorm = 184.3164, GNorm = 0.2318, lr_0 = 1.4086e-04
Loss = 5.6087e-03, PNorm = 184.3204, GNorm = 0.0947, lr_0 = 1.4076e-04
Loss = 4.2037e-03, PNorm = 184.3237, GNorm = 0.4543, lr_0 = 1.4066e-04
Loss = 2.7658e-03, PNorm = 184.3265, GNorm = 0.0908, lr_0 = 1.4057e-04
Loss = 4.0128e-03, PNorm = 184.3295, GNorm = 0.1881, lr_0 = 1.4047e-04
Loss = 7.1703e-03, PNorm = 184.3332, GNorm = 0.1257, lr_0 = 1.4038e-04
Loss = 1.7899e-03, PNorm = 184.3352, GNorm = 0.1879, lr_0 = 1.4028e-04
Loss = 3.2543e-03, PNorm = 184.3384, GNorm = 0.0740, lr_0 = 1.4018e-04
Loss = 2.0741e-03, PNorm = 184.3410, GNorm = 0.1467, lr_0 = 1.4009e-04
Loss = 1.9192e-03, PNorm = 184.3429, GNorm = 0.0556, lr_0 = 1.3999e-04
Loss = 2.9314e-03, PNorm = 184.3446, GNorm = 0.0628, lr_0 = 1.3990e-04
Loss = 2.3703e-03, PNorm = 184.3458, GNorm = 0.1306, lr_0 = 1.3980e-04
Loss = 2.1657e-03, PNorm = 184.3468, GNorm = 0.1151, lr_0 = 1.3970e-04
Loss = 6.3609e-03, PNorm = 184.3481, GNorm = 0.1293, lr_0 = 1.3961e-04
Loss = 4.5022e-03, PNorm = 184.3507, GNorm = 0.0805, lr_0 = 1.3951e-04
Loss = 2.4626e-03, PNorm = 184.3530, GNorm = 0.2259, lr_0 = 1.3942e-04
Loss = 3.1836e-03, PNorm = 184.3557, GNorm = 0.4017, lr_0 = 1.3932e-04
Loss = 4.1509e-03, PNorm = 184.3583, GNorm = 0.1010, lr_0 = 1.3923e-04
Loss = 7.3801e-03, PNorm = 184.3599, GNorm = 0.1171, lr_0 = 1.3913e-04
Loss = 5.1799e-03, PNorm = 184.3611, GNorm = 0.1075, lr_0 = 1.3904e-04
Loss = 2.2451e-03, PNorm = 184.3637, GNorm = 0.1198, lr_0 = 1.3894e-04
Validation mae = 0.120920
Epoch 26
Loss = 1.1124e-02, PNorm = 184.3664, GNorm = 0.3142, lr_0 = 1.3884e-04
Loss = 2.1964e-03, PNorm = 184.3674, GNorm = 0.0829, lr_0 = 1.3875e-04
Loss = 1.5333e-03, PNorm = 184.3686, GNorm = 0.0336, lr_0 = 1.3865e-04
Loss = 1.5319e-03, PNorm = 184.3702, GNorm = 0.0563, lr_0 = 1.3856e-04
Loss = 4.1963e-03, PNorm = 184.3721, GNorm = 0.0720, lr_0 = 1.3846e-04
Loss = 2.4751e-03, PNorm = 184.3725, GNorm = 0.1304, lr_0 = 1.3837e-04
Loss = 2.7362e-03, PNorm = 184.3745, GNorm = 0.1203, lr_0 = 1.3828e-04
Loss = 1.5984e-03, PNorm = 184.3765, GNorm = 0.0935, lr_0 = 1.3818e-04
Loss = 3.5634e-03, PNorm = 184.3788, GNorm = 0.1225, lr_0 = 1.3809e-04
Loss = 2.2678e-03, PNorm = 184.3788, GNorm = 0.1175, lr_0 = 1.3799e-04
Loss = 2.1044e-03, PNorm = 184.3803, GNorm = 0.1099, lr_0 = 1.3790e-04
Loss = 1.6223e-03, PNorm = 184.3818, GNorm = 0.0776, lr_0 = 1.3780e-04
Loss = 2.8405e-03, PNorm = 184.3831, GNorm = 0.0572, lr_0 = 1.3771e-04
Loss = 4.8677e-03, PNorm = 184.3841, GNorm = 0.1887, lr_0 = 1.3761e-04
Loss = 5.7072e-03, PNorm = 184.3867, GNorm = 0.0749, lr_0 = 1.3752e-04
Loss = 3.8177e-03, PNorm = 184.3877, GNorm = 0.0892, lr_0 = 1.3742e-04
Loss = 4.3802e-03, PNorm = 184.3892, GNorm = 0.0980, lr_0 = 1.3733e-04
Loss = 1.8032e-03, PNorm = 184.3919, GNorm = 0.0742, lr_0 = 1.3724e-04
Loss = 4.1110e-03, PNorm = 184.3944, GNorm = 0.1481, lr_0 = 1.3714e-04
Loss = 1.8059e-03, PNorm = 184.3970, GNorm = 0.0791, lr_0 = 1.3705e-04
Loss = 2.6271e-03, PNorm = 184.3990, GNorm = 0.1800, lr_0 = 1.3695e-04
Loss = 5.4102e-03, PNorm = 184.4011, GNorm = 0.1212, lr_0 = 1.3686e-04
Loss = 3.1663e-03, PNorm = 184.4032, GNorm = 0.0786, lr_0 = 1.3677e-04
Loss = 6.6862e-03, PNorm = 184.4052, GNorm = 0.1088, lr_0 = 1.3667e-04
Loss = 1.7145e-03, PNorm = 184.4077, GNorm = 0.1170, lr_0 = 1.3658e-04
Loss = 4.0486e-03, PNorm = 184.4100, GNorm = 0.0973, lr_0 = 1.3649e-04
Loss = 2.0680e-03, PNorm = 184.4100, GNorm = 0.0729, lr_0 = 1.3639e-04
Loss = 3.5726e-03, PNorm = 184.4113, GNorm = 0.1168, lr_0 = 1.3630e-04
Loss = 1.7551e-03, PNorm = 184.4140, GNorm = 0.1626, lr_0 = 1.3621e-04
Loss = 4.6689e-03, PNorm = 184.4149, GNorm = 0.0735, lr_0 = 1.3611e-04
Loss = 1.3901e-03, PNorm = 184.4153, GNorm = 0.0853, lr_0 = 1.3602e-04
Loss = 3.7376e-03, PNorm = 184.4184, GNorm = 0.1102, lr_0 = 1.3593e-04
Loss = 1.6505e-03, PNorm = 184.4194, GNorm = 0.0702, lr_0 = 1.3583e-04
Loss = 2.6718e-03, PNorm = 184.4213, GNorm = 0.0999, lr_0 = 1.3574e-04
Loss = 4.0184e-03, PNorm = 184.4233, GNorm = 0.1273, lr_0 = 1.3565e-04
Loss = 2.3332e-03, PNorm = 184.4259, GNorm = 0.0669, lr_0 = 1.3555e-04
Loss = 1.9699e-03, PNorm = 184.4278, GNorm = 0.0717, lr_0 = 1.3546e-04
Loss = 3.1718e-03, PNorm = 184.4296, GNorm = 0.1681, lr_0 = 1.3537e-04
Loss = 2.7338e-03, PNorm = 184.4313, GNorm = 0.1685, lr_0 = 1.3528e-04
Loss = 2.3790e-03, PNorm = 184.4332, GNorm = 0.2941, lr_0 = 1.3518e-04
Loss = 3.7768e-03, PNorm = 184.4350, GNorm = 0.0658, lr_0 = 1.3509e-04
Loss = 3.6184e-03, PNorm = 184.4376, GNorm = 0.0753, lr_0 = 1.3500e-04
Loss = 6.2769e-03, PNorm = 184.4390, GNorm = 0.0856, lr_0 = 1.3491e-04
Loss = 3.0608e-03, PNorm = 184.4414, GNorm = 0.0824, lr_0 = 1.3481e-04
Loss = 5.3470e-03, PNorm = 184.4440, GNorm = 0.0612, lr_0 = 1.3472e-04
Loss = 4.1736e-03, PNorm = 184.4454, GNorm = 0.1281, lr_0 = 1.3463e-04
Loss = 2.6734e-03, PNorm = 184.4479, GNorm = 0.0885, lr_0 = 1.3454e-04
Loss = 4.6715e-03, PNorm = 184.4506, GNorm = 0.0737, lr_0 = 1.3444e-04
Loss = 2.6247e-03, PNorm = 184.4527, GNorm = 0.2225, lr_0 = 1.3435e-04
Loss = 3.8810e-03, PNorm = 184.4551, GNorm = 0.0745, lr_0 = 1.3426e-04
Loss = 2.9351e-03, PNorm = 184.4564, GNorm = 0.1029, lr_0 = 1.3417e-04
Loss = 2.3494e-03, PNorm = 184.4585, GNorm = 0.1365, lr_0 = 1.3408e-04
Loss = 1.4633e-03, PNorm = 184.4600, GNorm = 0.1578, lr_0 = 1.3398e-04
Loss = 1.3771e-03, PNorm = 184.4618, GNorm = 0.1135, lr_0 = 1.3389e-04
Loss = 2.0359e-03, PNorm = 184.4636, GNorm = 0.0635, lr_0 = 1.3380e-04
Loss = 4.1019e-03, PNorm = 184.4644, GNorm = 0.1093, lr_0 = 1.3371e-04
Loss = 2.4981e-03, PNorm = 184.4662, GNorm = 0.1317, lr_0 = 1.3362e-04
Loss = 3.0052e-03, PNorm = 184.4684, GNorm = 0.0542, lr_0 = 1.3353e-04
Loss = 3.3566e-03, PNorm = 184.4705, GNorm = 0.1046, lr_0 = 1.3343e-04
Loss = 1.9638e-03, PNorm = 184.4723, GNorm = 0.1568, lr_0 = 1.3334e-04
Loss = 2.3884e-03, PNorm = 184.4742, GNorm = 0.0623, lr_0 = 1.3325e-04
Loss = 1.9260e-03, PNorm = 184.4756, GNorm = 0.0977, lr_0 = 1.3316e-04
Loss = 4.4765e-03, PNorm = 184.4771, GNorm = 0.4323, lr_0 = 1.3307e-04
Loss = 5.6621e-03, PNorm = 184.4767, GNorm = 0.1508, lr_0 = 1.3298e-04
Loss = 3.0448e-03, PNorm = 184.4773, GNorm = 0.0941, lr_0 = 1.3289e-04
Loss = 2.0961e-03, PNorm = 184.4797, GNorm = 0.1020, lr_0 = 1.3280e-04
Loss = 1.9983e-03, PNorm = 184.4808, GNorm = 0.0885, lr_0 = 1.3270e-04
Loss = 2.1794e-03, PNorm = 184.4822, GNorm = 0.1515, lr_0 = 1.3261e-04
Loss = 6.9092e-03, PNorm = 184.4845, GNorm = 0.0713, lr_0 = 1.3252e-04
Loss = 3.7577e-03, PNorm = 184.4858, GNorm = 0.1948, lr_0 = 1.3243e-04
Loss = 2.2034e-03, PNorm = 184.4887, GNorm = 0.0799, lr_0 = 1.3234e-04
Loss = 2.8895e-03, PNorm = 184.4892, GNorm = 0.1630, lr_0 = 1.3225e-04
Loss = 3.2372e-03, PNorm = 184.4916, GNorm = 0.0643, lr_0 = 1.3216e-04
Loss = 3.0432e-03, PNorm = 184.4951, GNorm = 0.1045, lr_0 = 1.3207e-04
Loss = 1.7270e-03, PNorm = 184.4979, GNorm = 0.0966, lr_0 = 1.3198e-04
Loss = 2.9142e-03, PNorm = 184.4998, GNorm = 0.1234, lr_0 = 1.3189e-04
Loss = 4.0400e-03, PNorm = 184.5021, GNorm = 0.2457, lr_0 = 1.3180e-04
Loss = 1.9784e-03, PNorm = 184.5040, GNorm = 0.3069, lr_0 = 1.3171e-04
Loss = 1.9986e-02, PNorm = 184.5072, GNorm = 2.9764, lr_0 = 1.3162e-04
Loss = 5.5096e-03, PNorm = 184.5091, GNorm = 0.1379, lr_0 = 1.3153e-04
Loss = 2.6060e-03, PNorm = 184.5112, GNorm = 0.1984, lr_0 = 1.3144e-04
Loss = 4.6645e-03, PNorm = 184.5148, GNorm = 0.0584, lr_0 = 1.3135e-04
Loss = 1.4535e-03, PNorm = 184.5175, GNorm = 0.0464, lr_0 = 1.3126e-04
Loss = 2.6556e-03, PNorm = 184.5190, GNorm = 0.2107, lr_0 = 1.3117e-04
Loss = 4.4966e-03, PNorm = 184.5216, GNorm = 0.1657, lr_0 = 1.3108e-04
Loss = 3.2193e-03, PNorm = 184.5243, GNorm = 0.0790, lr_0 = 1.3099e-04
Loss = 2.0022e-03, PNorm = 184.5272, GNorm = 0.1301, lr_0 = 1.3090e-04
Loss = 9.3202e-03, PNorm = 184.5298, GNorm = 0.2990, lr_0 = 1.3081e-04
Loss = 5.8287e-03, PNorm = 184.5325, GNorm = 0.4223, lr_0 = 1.3072e-04
Loss = 1.3184e-03, PNorm = 184.5344, GNorm = 0.0691, lr_0 = 1.3063e-04
Loss = 4.5478e-03, PNorm = 184.5367, GNorm = 0.0719, lr_0 = 1.3054e-04
Loss = 4.1307e-03, PNorm = 184.5379, GNorm = 0.0475, lr_0 = 1.3045e-04
Loss = 2.2400e-03, PNorm = 184.5401, GNorm = 0.1291, lr_0 = 1.3036e-04
Loss = 4.5455e-03, PNorm = 184.5406, GNorm = 0.0542, lr_0 = 1.3027e-04
Loss = 2.7091e-03, PNorm = 184.5424, GNorm = 0.1185, lr_0 = 1.3018e-04
Loss = 6.7246e-03, PNorm = 184.5449, GNorm = 0.0752, lr_0 = 1.3009e-04
Loss = 5.3338e-03, PNorm = 184.5472, GNorm = 0.0511, lr_0 = 1.3000e-04
Loss = 4.1399e-03, PNorm = 184.5508, GNorm = 0.2279, lr_0 = 1.2992e-04
Loss = 5.1866e-03, PNorm = 184.5529, GNorm = 0.1013, lr_0 = 1.2983e-04
Loss = 5.6111e-03, PNorm = 184.5558, GNorm = 0.0962, lr_0 = 1.2974e-04
Loss = 6.4872e-03, PNorm = 184.5579, GNorm = 0.0444, lr_0 = 1.2965e-04
Loss = 2.9633e-03, PNorm = 184.5601, GNorm = 0.3234, lr_0 = 1.2956e-04
Loss = 1.6243e-03, PNorm = 184.5637, GNorm = 0.1539, lr_0 = 1.2947e-04
Loss = 4.2346e-03, PNorm = 184.5663, GNorm = 0.1097, lr_0 = 1.2938e-04
Loss = 7.7026e-03, PNorm = 184.5683, GNorm = 0.0829, lr_0 = 1.2929e-04
Loss = 3.4847e-03, PNorm = 184.5711, GNorm = 0.0687, lr_0 = 1.2921e-04
Loss = 2.5720e-03, PNorm = 184.5725, GNorm = 0.0668, lr_0 = 1.2912e-04
Loss = 2.3622e-03, PNorm = 184.5743, GNorm = 0.0693, lr_0 = 1.2903e-04
Loss = 4.2738e-03, PNorm = 184.5748, GNorm = 0.0836, lr_0 = 1.2894e-04
Loss = 3.4570e-03, PNorm = 184.5771, GNorm = 0.1467, lr_0 = 1.2885e-04
Loss = 2.2644e-03, PNorm = 184.5791, GNorm = 0.0762, lr_0 = 1.2876e-04
Loss = 2.7049e-03, PNorm = 184.5807, GNorm = 0.1043, lr_0 = 1.2867e-04
Loss = 1.3741e-03, PNorm = 184.5822, GNorm = 0.0560, lr_0 = 1.2859e-04
Loss = 2.9798e-03, PNorm = 184.5836, GNorm = 0.0508, lr_0 = 1.2850e-04
Loss = 2.1021e-03, PNorm = 184.5859, GNorm = 0.1008, lr_0 = 1.2841e-04
Loss = 3.5964e-03, PNorm = 184.5864, GNorm = 0.0513, lr_0 = 1.2832e-04
Loss = 1.7168e-03, PNorm = 184.5859, GNorm = 0.1967, lr_0 = 1.2823e-04
Loss = 4.9518e-03, PNorm = 184.5885, GNorm = 0.1296, lr_0 = 1.2815e-04
Loss = 4.8241e-03, PNorm = 184.5914, GNorm = 0.0838, lr_0 = 1.2806e-04
Loss = 5.3323e-03, PNorm = 184.5934, GNorm = 0.0505, lr_0 = 1.2797e-04
Validation mae = 0.120821
Epoch 27
Loss = 2.2313e-03, PNorm = 184.5957, GNorm = 0.0669, lr_0 = 1.2788e-04
Loss = 1.2854e-03, PNorm = 184.5978, GNorm = 0.1016, lr_0 = 1.2780e-04
Loss = 1.8149e-03, PNorm = 184.5990, GNorm = 0.0706, lr_0 = 1.2771e-04
Loss = 1.5991e-03, PNorm = 184.6005, GNorm = 0.1207, lr_0 = 1.2762e-04
Loss = 3.7816e-03, PNorm = 184.6014, GNorm = 0.1530, lr_0 = 1.2753e-04
Loss = 1.6018e-03, PNorm = 184.6035, GNorm = 0.0932, lr_0 = 1.2745e-04
Loss = 2.7423e-03, PNorm = 184.6057, GNorm = 0.1117, lr_0 = 1.2736e-04
Loss = 1.8502e-03, PNorm = 184.6075, GNorm = 0.1476, lr_0 = 1.2727e-04
Loss = 3.0237e-03, PNorm = 184.6095, GNorm = 0.1348, lr_0 = 1.2718e-04
Loss = 5.8004e-03, PNorm = 184.6107, GNorm = 0.0614, lr_0 = 1.2710e-04
Loss = 1.7808e-03, PNorm = 184.6127, GNorm = 0.1110, lr_0 = 1.2701e-04
Loss = 1.3304e-03, PNorm = 184.6133, GNorm = 0.0814, lr_0 = 1.2692e-04
Loss = 3.0057e-03, PNorm = 184.6141, GNorm = 0.0761, lr_0 = 1.2684e-04
Loss = 1.3212e-03, PNorm = 184.6147, GNorm = 0.1090, lr_0 = 1.2675e-04
Loss = 6.7668e-03, PNorm = 184.6175, GNorm = 0.2319, lr_0 = 1.2666e-04
Loss = 2.4993e-03, PNorm = 184.6188, GNorm = 0.0647, lr_0 = 1.2658e-04
Loss = 3.6641e-03, PNorm = 184.6198, GNorm = 0.1464, lr_0 = 1.2649e-04
Loss = 3.3168e-03, PNorm = 184.6212, GNorm = 0.0993, lr_0 = 1.2640e-04
Loss = 5.0378e-03, PNorm = 184.6214, GNorm = 0.0995, lr_0 = 1.2632e-04
Loss = 3.1707e-03, PNorm = 184.6223, GNorm = 0.1487, lr_0 = 1.2623e-04
Loss = 5.9993e-03, PNorm = 184.6241, GNorm = 0.0427, lr_0 = 1.2614e-04
Loss = 4.8035e-03, PNorm = 184.6255, GNorm = 0.0963, lr_0 = 1.2606e-04
Loss = 1.3153e-03, PNorm = 184.6266, GNorm = 0.1094, lr_0 = 1.2597e-04
Loss = 1.9416e-03, PNorm = 184.6272, GNorm = 0.0911, lr_0 = 1.2588e-04
Loss = 5.2223e-03, PNorm = 184.6291, GNorm = 0.6667, lr_0 = 1.2580e-04
Loss = 1.9720e-03, PNorm = 184.6317, GNorm = 0.0788, lr_0 = 1.2571e-04
Loss = 2.6637e-03, PNorm = 184.6325, GNorm = 0.2602, lr_0 = 1.2563e-04
Loss = 9.4015e-03, PNorm = 184.6339, GNorm = 0.6439, lr_0 = 1.2554e-04
Loss = 1.9246e-03, PNorm = 184.6340, GNorm = 0.0560, lr_0 = 1.2545e-04
Loss = 1.3786e-03, PNorm = 184.6341, GNorm = 0.0654, lr_0 = 1.2537e-04
Loss = 1.7245e-03, PNorm = 184.6363, GNorm = 0.1073, lr_0 = 1.2528e-04
Loss = 3.7176e-03, PNorm = 184.6385, GNorm = 0.0810, lr_0 = 1.2520e-04
Loss = 1.1588e-03, PNorm = 184.6395, GNorm = 0.0734, lr_0 = 1.2511e-04
Loss = 1.3036e-03, PNorm = 184.6413, GNorm = 0.0510, lr_0 = 1.2502e-04
Loss = 1.6557e-03, PNorm = 184.6433, GNorm = 0.1218, lr_0 = 1.2494e-04
Loss = 1.8662e-03, PNorm = 184.6449, GNorm = 0.1177, lr_0 = 1.2485e-04
Loss = 3.3663e-03, PNorm = 184.6469, GNorm = 0.0876, lr_0 = 1.2477e-04
Loss = 1.7152e-03, PNorm = 184.6481, GNorm = 0.2079, lr_0 = 1.2468e-04
Loss = 2.2844e-03, PNorm = 184.6481, GNorm = 0.1331, lr_0 = 1.2460e-04
Loss = 1.9702e-03, PNorm = 184.6495, GNorm = 0.0760, lr_0 = 1.2451e-04
Loss = 5.4721e-03, PNorm = 184.6517, GNorm = 0.1608, lr_0 = 1.2443e-04
Loss = 3.5653e-03, PNorm = 184.6518, GNorm = 0.0787, lr_0 = 1.2434e-04
Loss = 1.4152e-03, PNorm = 184.6522, GNorm = 0.0927, lr_0 = 1.2426e-04
Loss = 1.6910e-03, PNorm = 184.6536, GNorm = 0.0468, lr_0 = 1.2417e-04
Loss = 1.0348e-03, PNorm = 184.6555, GNorm = 0.1354, lr_0 = 1.2409e-04
Loss = 1.5763e-03, PNorm = 184.6579, GNorm = 0.0827, lr_0 = 1.2400e-04
Loss = 1.6112e-03, PNorm = 184.6601, GNorm = 0.0926, lr_0 = 1.2392e-04
Loss = 3.3005e-03, PNorm = 184.6624, GNorm = 0.0673, lr_0 = 1.2383e-04
Loss = 2.1269e-03, PNorm = 184.6643, GNorm = 0.0833, lr_0 = 1.2375e-04
Loss = 2.5964e-03, PNorm = 184.6666, GNorm = 0.1263, lr_0 = 1.2366e-04
Loss = 3.8252e-03, PNorm = 184.6700, GNorm = 0.0987, lr_0 = 1.2358e-04
Loss = 1.9585e-03, PNorm = 184.6715, GNorm = 0.1010, lr_0 = 1.2349e-04
Loss = 2.0432e-03, PNorm = 184.6727, GNorm = 0.0952, lr_0 = 1.2341e-04
Loss = 1.8775e-03, PNorm = 184.6743, GNorm = 0.0958, lr_0 = 1.2332e-04
Loss = 1.4509e-03, PNorm = 184.6756, GNorm = 0.1340, lr_0 = 1.2324e-04
Loss = 1.7138e-03, PNorm = 184.6784, GNorm = 0.0639, lr_0 = 1.2315e-04
Loss = 3.3802e-03, PNorm = 184.6806, GNorm = 0.0886, lr_0 = 1.2307e-04
Loss = 7.6246e-03, PNorm = 184.6825, GNorm = 0.2291, lr_0 = 1.2298e-04
Loss = 2.0104e-03, PNorm = 184.6832, GNorm = 0.0984, lr_0 = 1.2290e-04
Loss = 4.6332e-03, PNorm = 184.6842, GNorm = 0.1246, lr_0 = 1.2282e-04
Loss = 2.3176e-03, PNorm = 184.6847, GNorm = 0.0961, lr_0 = 1.2273e-04
Loss = 3.5525e-03, PNorm = 184.6864, GNorm = 0.0929, lr_0 = 1.2265e-04
Loss = 3.6732e-03, PNorm = 184.6887, GNorm = 0.2251, lr_0 = 1.2256e-04
Loss = 1.2539e-03, PNorm = 184.6899, GNorm = 0.0852, lr_0 = 1.2248e-04
Loss = 1.7501e-03, PNorm = 184.6911, GNorm = 0.1078, lr_0 = 1.2240e-04
Loss = 2.2019e-03, PNorm = 184.6932, GNorm = 0.0447, lr_0 = 1.2231e-04
Loss = 2.0189e-03, PNorm = 184.6941, GNorm = 0.0950, lr_0 = 1.2223e-04
Loss = 5.4025e-03, PNorm = 184.6959, GNorm = 0.0910, lr_0 = 1.2214e-04
Loss = 2.2901e-03, PNorm = 184.6970, GNorm = 0.0904, lr_0 = 1.2206e-04
Loss = 1.5843e-03, PNorm = 184.6972, GNorm = 0.0700, lr_0 = 1.2198e-04
Loss = 7.2426e-03, PNorm = 184.6962, GNorm = 0.0542, lr_0 = 1.2189e-04
Loss = 3.7991e-03, PNorm = 184.6978, GNorm = 0.0517, lr_0 = 1.2181e-04
Loss = 5.5733e-03, PNorm = 184.6994, GNorm = 0.0590, lr_0 = 1.2173e-04
Loss = 1.4880e-03, PNorm = 184.7014, GNorm = 0.0603, lr_0 = 1.2164e-04
Loss = 2.4209e-03, PNorm = 184.7032, GNorm = 0.0462, lr_0 = 1.2156e-04
Loss = 3.6273e-03, PNorm = 184.7046, GNorm = 0.1893, lr_0 = 1.2148e-04
Loss = 1.9268e-03, PNorm = 184.7068, GNorm = 0.0628, lr_0 = 1.2139e-04
Loss = 7.6486e-03, PNorm = 184.7076, GNorm = 0.1555, lr_0 = 1.2131e-04
Loss = 1.7767e-03, PNorm = 184.7097, GNorm = 0.0647, lr_0 = 1.2123e-04
Loss = 1.2974e-03, PNorm = 184.7111, GNorm = 0.1181, lr_0 = 1.2114e-04
Loss = 6.0076e-03, PNorm = 184.7123, GNorm = 0.1140, lr_0 = 1.2106e-04
Loss = 2.2682e-03, PNorm = 184.7153, GNorm = 0.0909, lr_0 = 1.2098e-04
Loss = 3.0326e-03, PNorm = 184.7188, GNorm = 0.0790, lr_0 = 1.2090e-04
Loss = 2.2280e-03, PNorm = 184.7222, GNorm = 0.0812, lr_0 = 1.2081e-04
Loss = 2.7043e-03, PNorm = 184.7243, GNorm = 0.1623, lr_0 = 1.2073e-04
Loss = 8.4527e-03, PNorm = 184.7265, GNorm = 0.0528, lr_0 = 1.2065e-04
Loss = 3.7729e-03, PNorm = 184.7280, GNorm = 0.1389, lr_0 = 1.2056e-04
Loss = 2.4865e-03, PNorm = 184.7297, GNorm = 0.0666, lr_0 = 1.2048e-04
Loss = 2.5573e-03, PNorm = 184.7315, GNorm = 0.0665, lr_0 = 1.2040e-04
Loss = 6.3252e-03, PNorm = 184.7332, GNorm = 0.0605, lr_0 = 1.2032e-04
Loss = 4.0793e-03, PNorm = 184.7344, GNorm = 0.1073, lr_0 = 1.2023e-04
Loss = 4.8958e-03, PNorm = 184.7358, GNorm = 0.1595, lr_0 = 1.2015e-04
Loss = 4.9055e-03, PNorm = 184.7375, GNorm = 0.0913, lr_0 = 1.2007e-04
Loss = 2.9059e-03, PNorm = 184.7390, GNorm = 0.1107, lr_0 = 1.1999e-04
Loss = 5.5576e-03, PNorm = 184.7404, GNorm = 0.1064, lr_0 = 1.1991e-04
Loss = 1.8066e-03, PNorm = 184.7422, GNorm = 0.1524, lr_0 = 1.1982e-04
Loss = 7.3203e-03, PNorm = 184.7449, GNorm = 0.0781, lr_0 = 1.1974e-04
Loss = 2.9205e-03, PNorm = 184.7468, GNorm = 0.0460, lr_0 = 1.1966e-04
Loss = 2.1372e-03, PNorm = 184.7489, GNorm = 0.0782, lr_0 = 1.1958e-04
Loss = 1.0373e-02, PNorm = 184.7498, GNorm = 0.2087, lr_0 = 1.1950e-04
Loss = 2.6337e-03, PNorm = 184.7524, GNorm = 0.1824, lr_0 = 1.1941e-04
Loss = 3.0798e-03, PNorm = 184.7539, GNorm = 0.2497, lr_0 = 1.1933e-04
Loss = 2.0737e-03, PNorm = 184.7547, GNorm = 0.1632, lr_0 = 1.1925e-04
Loss = 2.1940e-03, PNorm = 184.7565, GNorm = 0.5936, lr_0 = 1.1917e-04
Loss = 1.4070e-03, PNorm = 184.7586, GNorm = 0.0916, lr_0 = 1.1909e-04
Loss = 1.2764e-03, PNorm = 184.7606, GNorm = 0.0908, lr_0 = 1.1901e-04
Loss = 1.7982e-03, PNorm = 184.7626, GNorm = 0.0675, lr_0 = 1.1892e-04
Loss = 5.5361e-03, PNorm = 184.7638, GNorm = 0.1708, lr_0 = 1.1884e-04
Loss = 8.4618e-03, PNorm = 184.7657, GNorm = 0.1307, lr_0 = 1.1876e-04
Loss = 1.3189e-03, PNorm = 184.7667, GNorm = 0.0927, lr_0 = 1.1868e-04
Loss = 2.5916e-03, PNorm = 184.7679, GNorm = 0.0814, lr_0 = 1.1860e-04
Loss = 1.9250e-02, PNorm = 184.7720, GNorm = 2.6865, lr_0 = 1.1852e-04
Loss = 2.3699e-03, PNorm = 184.7750, GNorm = 0.1667, lr_0 = 1.1844e-04
Loss = 6.6744e-03, PNorm = 184.7756, GNorm = 0.5883, lr_0 = 1.1835e-04
Loss = 2.5369e-03, PNorm = 184.7780, GNorm = 0.0877, lr_0 = 1.1827e-04
Loss = 5.9198e-03, PNorm = 184.7808, GNorm = 0.3758, lr_0 = 1.1819e-04
Loss = 2.1307e-03, PNorm = 184.7821, GNorm = 0.1103, lr_0 = 1.1811e-04
Loss = 3.6610e-03, PNorm = 184.7847, GNorm = 0.0606, lr_0 = 1.1803e-04
Loss = 1.1041e-03, PNorm = 184.7866, GNorm = 0.1466, lr_0 = 1.1795e-04
Loss = 4.1228e-03, PNorm = 184.7891, GNorm = 0.1503, lr_0 = 1.1787e-04
Validation mae = 0.120903
Epoch 28
Loss = 7.9721e-03, PNorm = 184.7905, GNorm = 0.0559, lr_0 = 1.1779e-04
Loss = 3.4585e-03, PNorm = 184.7915, GNorm = 0.1603, lr_0 = 1.1771e-04
Loss = 2.7525e-03, PNorm = 184.7932, GNorm = 0.1204, lr_0 = 1.1763e-04
Loss = 5.9889e-03, PNorm = 184.7932, GNorm = 0.1382, lr_0 = 1.1755e-04
Loss = 2.9022e-03, PNorm = 184.7939, GNorm = 0.1060, lr_0 = 1.1747e-04
Loss = 3.2445e-03, PNorm = 184.7962, GNorm = 0.3641, lr_0 = 1.1739e-04
Loss = 3.4194e-03, PNorm = 184.7961, GNorm = 0.0649, lr_0 = 1.1730e-04
Loss = 2.3122e-03, PNorm = 184.7974, GNorm = 0.0546, lr_0 = 1.1722e-04
Loss = 1.6100e-03, PNorm = 184.7986, GNorm = 0.1306, lr_0 = 1.1714e-04
Loss = 6.1398e-03, PNorm = 184.7996, GNorm = 0.0876, lr_0 = 1.1706e-04
Loss = 2.2280e-03, PNorm = 184.8007, GNorm = 0.1038, lr_0 = 1.1698e-04
Loss = 1.0246e-03, PNorm = 184.8015, GNorm = 0.1138, lr_0 = 1.1690e-04
Loss = 2.6135e-03, PNorm = 184.8025, GNorm = 0.0529, lr_0 = 1.1682e-04
Loss = 2.1748e-03, PNorm = 184.8036, GNorm = 0.0809, lr_0 = 1.1674e-04
Loss = 2.7890e-03, PNorm = 184.8045, GNorm = 0.0577, lr_0 = 1.1666e-04
Loss = 3.8176e-03, PNorm = 184.8056, GNorm = 0.4529, lr_0 = 1.1658e-04
Loss = 1.2042e-03, PNorm = 184.8061, GNorm = 0.0419, lr_0 = 1.1650e-04
Loss = 2.1932e-03, PNorm = 184.8070, GNorm = 0.0727, lr_0 = 1.1642e-04
Loss = 3.3721e-03, PNorm = 184.8071, GNorm = 0.1265, lr_0 = 1.1634e-04
Loss = 1.4788e-03, PNorm = 184.8079, GNorm = 0.0767, lr_0 = 1.1626e-04
Loss = 1.4895e-03, PNorm = 184.8094, GNorm = 0.0909, lr_0 = 1.1618e-04
Loss = 2.5280e-03, PNorm = 184.8110, GNorm = 0.1291, lr_0 = 1.1611e-04
Loss = 2.9328e-03, PNorm = 184.8130, GNorm = 0.1703, lr_0 = 1.1603e-04
Loss = 1.6605e-03, PNorm = 184.8150, GNorm = 0.0746, lr_0 = 1.1595e-04
Loss = 2.2489e-03, PNorm = 184.8159, GNorm = 0.1283, lr_0 = 1.1587e-04
Loss = 1.7915e-03, PNorm = 184.8165, GNorm = 0.0923, lr_0 = 1.1579e-04
Loss = 1.3125e-03, PNorm = 184.8184, GNorm = 0.1092, lr_0 = 1.1571e-04
Loss = 3.2591e-03, PNorm = 184.8197, GNorm = 0.0684, lr_0 = 1.1563e-04
Loss = 3.5506e-03, PNorm = 184.8216, GNorm = 0.1630, lr_0 = 1.1555e-04
Loss = 1.3696e-03, PNorm = 184.8241, GNorm = 0.0478, lr_0 = 1.1547e-04
Loss = 2.4393e-03, PNorm = 184.8265, GNorm = 0.1331, lr_0 = 1.1539e-04
Loss = 2.9086e-03, PNorm = 184.8282, GNorm = 0.1019, lr_0 = 1.1531e-04
Loss = 1.9619e-03, PNorm = 184.8294, GNorm = 0.0404, lr_0 = 1.1523e-04
Loss = 4.2712e-03, PNorm = 184.8314, GNorm = 0.1829, lr_0 = 1.1515e-04
Loss = 1.4290e-03, PNorm = 184.8324, GNorm = 0.0701, lr_0 = 1.1508e-04
Loss = 4.0547e-03, PNorm = 184.8348, GNorm = 0.2191, lr_0 = 1.1500e-04
Loss = 2.0225e-03, PNorm = 184.8356, GNorm = 0.1106, lr_0 = 1.1492e-04
Loss = 2.3003e-03, PNorm = 184.8370, GNorm = 0.1041, lr_0 = 1.1484e-04
Loss = 1.3102e-03, PNorm = 184.8388, GNorm = 0.0546, lr_0 = 1.1476e-04
Loss = 2.9082e-03, PNorm = 184.8398, GNorm = 0.0641, lr_0 = 1.1468e-04
Loss = 1.4364e-03, PNorm = 184.8407, GNorm = 0.0668, lr_0 = 1.1460e-04
Loss = 1.2880e-03, PNorm = 184.8420, GNorm = 0.0892, lr_0 = 1.1452e-04
Loss = 1.5757e-03, PNorm = 184.8432, GNorm = 0.0580, lr_0 = 1.1445e-04
Loss = 1.7341e-03, PNorm = 184.8443, GNorm = 0.1356, lr_0 = 1.1437e-04
Loss = 1.0903e-02, PNorm = 184.8466, GNorm = 0.0859, lr_0 = 1.1429e-04
Loss = 3.5301e-03, PNorm = 184.8482, GNorm = 0.0716, lr_0 = 1.1421e-04
Loss = 1.5705e-03, PNorm = 184.8494, GNorm = 0.0922, lr_0 = 1.1413e-04
Loss = 2.9857e-03, PNorm = 184.8513, GNorm = 0.1464, lr_0 = 1.1405e-04
Loss = 2.5617e-03, PNorm = 184.8528, GNorm = 0.1886, lr_0 = 1.1398e-04
Loss = 1.1241e-03, PNorm = 184.8531, GNorm = 0.1070, lr_0 = 1.1390e-04
Loss = 2.1123e-03, PNorm = 184.8543, GNorm = 0.0536, lr_0 = 1.1382e-04
Loss = 1.6940e-02, PNorm = 184.8560, GNorm = 0.1810, lr_0 = 1.1374e-04
Loss = 3.1237e-03, PNorm = 184.8559, GNorm = 0.3544, lr_0 = 1.1366e-04
Loss = 2.4819e-03, PNorm = 184.8566, GNorm = 0.0870, lr_0 = 1.1359e-04
Loss = 2.1665e-03, PNorm = 184.8578, GNorm = 0.1202, lr_0 = 1.1351e-04
Loss = 4.1349e-03, PNorm = 184.8590, GNorm = 0.1461, lr_0 = 1.1343e-04
Loss = 4.0195e-03, PNorm = 184.8591, GNorm = 0.1270, lr_0 = 1.1335e-04
Loss = 1.5675e-03, PNorm = 184.8601, GNorm = 0.0719, lr_0 = 1.1328e-04
Loss = 1.5275e-03, PNorm = 184.8612, GNorm = 0.0520, lr_0 = 1.1320e-04
Loss = 2.3646e-03, PNorm = 184.8618, GNorm = 0.0321, lr_0 = 1.1312e-04
Loss = 1.7313e-03, PNorm = 184.8630, GNorm = 0.0508, lr_0 = 1.1304e-04
Loss = 9.5701e-03, PNorm = 184.8634, GNorm = 0.2679, lr_0 = 1.1297e-04
Loss = 1.0588e-02, PNorm = 184.8657, GNorm = 0.1244, lr_0 = 1.1289e-04
Loss = 1.5959e-03, PNorm = 184.8679, GNorm = 0.1092, lr_0 = 1.1281e-04
Loss = 1.3806e-03, PNorm = 184.8698, GNorm = 0.0861, lr_0 = 1.1273e-04
Loss = 1.4620e-03, PNorm = 184.8719, GNorm = 0.0475, lr_0 = 1.1266e-04
Loss = 2.3305e-03, PNorm = 184.8741, GNorm = 0.0935, lr_0 = 1.1258e-04
Loss = 1.4897e-03, PNorm = 184.8757, GNorm = 0.0329, lr_0 = 1.1250e-04
Loss = 1.4330e-03, PNorm = 184.8763, GNorm = 0.0542, lr_0 = 1.1243e-04
Loss = 2.4476e-03, PNorm = 184.8776, GNorm = 0.0925, lr_0 = 1.1235e-04
Loss = 3.3749e-03, PNorm = 184.8778, GNorm = 0.0747, lr_0 = 1.1227e-04
Loss = 1.4122e-03, PNorm = 184.8777, GNorm = 0.0450, lr_0 = 1.1219e-04
Loss = 4.0434e-03, PNorm = 184.8783, GNorm = 0.3022, lr_0 = 1.1212e-04
Loss = 4.3288e-03, PNorm = 184.8813, GNorm = 0.0513, lr_0 = 1.1204e-04
Loss = 2.6091e-03, PNorm = 184.8835, GNorm = 0.1799, lr_0 = 1.1196e-04
Loss = 2.1422e-03, PNorm = 184.8835, GNorm = 0.1007, lr_0 = 1.1189e-04
Loss = 1.9326e-03, PNorm = 184.8839, GNorm = 0.0923, lr_0 = 1.1181e-04
Loss = 1.9538e-03, PNorm = 184.8842, GNorm = 0.0404, lr_0 = 1.1173e-04
Loss = 5.7893e-03, PNorm = 184.8852, GNorm = 0.4228, lr_0 = 1.1166e-04
Loss = 2.0116e-03, PNorm = 184.8856, GNorm = 0.0482, lr_0 = 1.1158e-04
Loss = 2.3444e-03, PNorm = 184.8870, GNorm = 0.1082, lr_0 = 1.1150e-04
Loss = 4.5227e-03, PNorm = 184.8893, GNorm = 0.1379, lr_0 = 1.1143e-04
Loss = 1.4106e-03, PNorm = 184.8910, GNorm = 0.0539, lr_0 = 1.1135e-04
Loss = 2.1752e-03, PNorm = 184.8919, GNorm = 0.0913, lr_0 = 1.1128e-04
Loss = 5.0805e-03, PNorm = 184.8923, GNorm = 0.0751, lr_0 = 1.1120e-04
Loss = 3.5024e-03, PNorm = 184.8933, GNorm = 0.0833, lr_0 = 1.1112e-04
Loss = 5.3431e-03, PNorm = 184.8946, GNorm = 0.0595, lr_0 = 1.1105e-04
Loss = 1.2482e-03, PNorm = 184.8957, GNorm = 0.0796, lr_0 = 1.1097e-04
Loss = 1.0838e-03, PNorm = 184.8971, GNorm = 0.0942, lr_0 = 1.1089e-04
Loss = 2.8538e-03, PNorm = 184.8986, GNorm = 0.0771, lr_0 = 1.1082e-04
Loss = 1.8401e-03, PNorm = 184.8995, GNorm = 0.0548, lr_0 = 1.1074e-04
Loss = 2.0460e-03, PNorm = 184.8995, GNorm = 0.0384, lr_0 = 1.1067e-04
Loss = 2.9348e-03, PNorm = 184.9005, GNorm = 0.0872, lr_0 = 1.1059e-04
Loss = 1.8470e-03, PNorm = 184.9024, GNorm = 0.0795, lr_0 = 1.1052e-04
Loss = 2.2741e-03, PNorm = 184.9042, GNorm = 0.0694, lr_0 = 1.1044e-04
Loss = 4.4483e-03, PNorm = 184.9069, GNorm = 0.1566, lr_0 = 1.1036e-04
Loss = 1.6721e-03, PNorm = 184.9087, GNorm = 0.0398, lr_0 = 1.1029e-04
Loss = 3.8250e-03, PNorm = 184.9102, GNorm = 0.5095, lr_0 = 1.1021e-04
Loss = 3.2172e-03, PNorm = 184.9111, GNorm = 0.0872, lr_0 = 1.1014e-04
Loss = 2.8394e-03, PNorm = 184.9127, GNorm = 0.2951, lr_0 = 1.1006e-04
Loss = 3.7748e-03, PNorm = 184.9155, GNorm = 0.1234, lr_0 = 1.0999e-04
Loss = 3.8811e-03, PNorm = 184.9170, GNorm = 0.1232, lr_0 = 1.0991e-04
Loss = 1.0609e-03, PNorm = 184.9181, GNorm = 0.0729, lr_0 = 1.0984e-04
Loss = 1.4861e-03, PNorm = 184.9192, GNorm = 0.1515, lr_0 = 1.0976e-04
Loss = 9.6722e-03, PNorm = 184.9194, GNorm = 0.1364, lr_0 = 1.0969e-04
Loss = 5.1157e-03, PNorm = 184.9203, GNorm = 0.1176, lr_0 = 1.0961e-04
Loss = 3.7483e-03, PNorm = 184.9215, GNorm = 0.0458, lr_0 = 1.0954e-04
Loss = 5.3939e-03, PNorm = 184.9225, GNorm = 0.2123, lr_0 = 1.0946e-04
Loss = 4.1724e-03, PNorm = 184.9239, GNorm = 0.0455, lr_0 = 1.0939e-04
Loss = 3.7274e-03, PNorm = 184.9261, GNorm = 0.0645, lr_0 = 1.0931e-04
Loss = 2.6067e-03, PNorm = 184.9276, GNorm = 0.0692, lr_0 = 1.0924e-04
Loss = 3.0397e-03, PNorm = 184.9290, GNorm = 0.0340, lr_0 = 1.0916e-04
Loss = 1.3878e-03, PNorm = 184.9307, GNorm = 0.1465, lr_0 = 1.0909e-04
Loss = 3.8179e-03, PNorm = 184.9330, GNorm = 0.0651, lr_0 = 1.0901e-04
Loss = 1.1608e-03, PNorm = 184.9349, GNorm = 0.0540, lr_0 = 1.0894e-04
Loss = 4.7263e-03, PNorm = 184.9352, GNorm = 0.1468, lr_0 = 1.0886e-04
Loss = 3.3763e-03, PNorm = 184.9354, GNorm = 0.0964, lr_0 = 1.0879e-04
Loss = 4.1003e-03, PNorm = 184.9365, GNorm = 0.1399, lr_0 = 1.0871e-04
Loss = 3.9565e-03, PNorm = 184.9374, GNorm = 0.1089, lr_0 = 1.0864e-04
Loss = 3.2961e-03, PNorm = 184.9386, GNorm = 0.1544, lr_0 = 1.0856e-04
Validation mae = 0.120869
Epoch 29
Loss = 1.3597e-03, PNorm = 184.9398, GNorm = 0.0668, lr_0 = 1.0849e-04
Loss = 2.4971e-03, PNorm = 184.9402, GNorm = 0.1001, lr_0 = 1.0841e-04
Loss = 1.0786e-03, PNorm = 184.9414, GNorm = 0.0897, lr_0 = 1.0834e-04
Loss = 9.3432e-04, PNorm = 184.9425, GNorm = 0.0516, lr_0 = 1.0827e-04
Loss = 1.8695e-03, PNorm = 184.9436, GNorm = 0.1073, lr_0 = 1.0819e-04
Loss = 1.0032e-03, PNorm = 184.9447, GNorm = 0.1052, lr_0 = 1.0812e-04
Loss = 3.1108e-03, PNorm = 184.9456, GNorm = 0.0608, lr_0 = 1.0804e-04
Loss = 1.9635e-03, PNorm = 184.9465, GNorm = 0.1755, lr_0 = 1.0797e-04
Loss = 9.7340e-04, PNorm = 184.9478, GNorm = 0.1252, lr_0 = 1.0790e-04
Loss = 2.2269e-03, PNorm = 184.9502, GNorm = 0.0430, lr_0 = 1.0782e-04
Loss = 8.9106e-04, PNorm = 184.9513, GNorm = 0.0548, lr_0 = 1.0775e-04
Loss = 8.9319e-04, PNorm = 184.9522, GNorm = 0.0855, lr_0 = 1.0767e-04
Loss = 8.9436e-04, PNorm = 184.9528, GNorm = 0.0324, lr_0 = 1.0760e-04
Loss = 1.8136e-03, PNorm = 184.9543, GNorm = 0.1716, lr_0 = 1.0753e-04
Loss = 1.2628e-03, PNorm = 184.9564, GNorm = 0.0554, lr_0 = 1.0745e-04
Loss = 4.9978e-03, PNorm = 184.9576, GNorm = 0.0665, lr_0 = 1.0738e-04
Loss = 2.5844e-03, PNorm = 184.9586, GNorm = 0.0735, lr_0 = 1.0731e-04
Loss = 1.1864e-03, PNorm = 184.9604, GNorm = 0.0966, lr_0 = 1.0723e-04
Loss = 3.4428e-03, PNorm = 184.9615, GNorm = 0.0619, lr_0 = 1.0716e-04
Loss = 1.3533e-03, PNorm = 184.9632, GNorm = 0.0317, lr_0 = 1.0709e-04
Loss = 3.4709e-03, PNorm = 184.9640, GNorm = 0.1188, lr_0 = 1.0701e-04
Loss = 2.4280e-03, PNorm = 184.9648, GNorm = 0.1171, lr_0 = 1.0694e-04
Loss = 3.7084e-03, PNorm = 184.9648, GNorm = 0.1047, lr_0 = 1.0687e-04
Loss = 3.4884e-03, PNorm = 184.9650, GNorm = 0.1327, lr_0 = 1.0679e-04
Loss = 1.3734e-03, PNorm = 184.9660, GNorm = 0.1148, lr_0 = 1.0672e-04
Loss = 4.6733e-03, PNorm = 184.9671, GNorm = 0.1438, lr_0 = 1.0665e-04
Loss = 1.0544e-03, PNorm = 184.9686, GNorm = 0.0404, lr_0 = 1.0657e-04
Loss = 4.4078e-03, PNorm = 184.9711, GNorm = 0.0636, lr_0 = 1.0650e-04
Loss = 1.9991e-03, PNorm = 184.9730, GNorm = 0.1170, lr_0 = 1.0643e-04
Loss = 1.4546e-03, PNorm = 184.9743, GNorm = 0.0790, lr_0 = 1.0635e-04
Loss = 2.2893e-03, PNorm = 184.9752, GNorm = 0.2441, lr_0 = 1.0628e-04
Loss = 4.7561e-03, PNorm = 184.9768, GNorm = 0.1216, lr_0 = 1.0621e-04
Loss = 2.0861e-03, PNorm = 184.9777, GNorm = 0.0812, lr_0 = 1.0614e-04
Loss = 3.4102e-03, PNorm = 184.9777, GNorm = 0.3073, lr_0 = 1.0606e-04
Loss = 9.2519e-04, PNorm = 184.9782, GNorm = 0.0641, lr_0 = 1.0599e-04
Loss = 3.3239e-03, PNorm = 184.9779, GNorm = 0.1967, lr_0 = 1.0592e-04
Loss = 1.9559e-03, PNorm = 184.9786, GNorm = 0.1311, lr_0 = 1.0585e-04
Loss = 2.2615e-03, PNorm = 184.9801, GNorm = 0.0984, lr_0 = 1.0577e-04
Loss = 1.3457e-03, PNorm = 184.9805, GNorm = 0.0645, lr_0 = 1.0570e-04
Loss = 1.5657e-03, PNorm = 184.9823, GNorm = 0.0688, lr_0 = 1.0563e-04
Loss = 1.0874e-03, PNorm = 184.9836, GNorm = 0.0876, lr_0 = 1.0556e-04
Loss = 3.1605e-03, PNorm = 184.9843, GNorm = 0.3698, lr_0 = 1.0548e-04
Loss = 3.6554e-03, PNorm = 184.9854, GNorm = 0.0831, lr_0 = 1.0541e-04
Loss = 2.3987e-03, PNorm = 184.9866, GNorm = 0.1725, lr_0 = 1.0534e-04
Loss = 2.7155e-03, PNorm = 184.9876, GNorm = 0.0882, lr_0 = 1.0527e-04
Loss = 5.4727e-03, PNorm = 184.9890, GNorm = 0.0354, lr_0 = 1.0519e-04
Loss = 4.6035e-03, PNorm = 184.9899, GNorm = 0.2114, lr_0 = 1.0512e-04
Loss = 1.9141e-03, PNorm = 184.9917, GNorm = 0.0932, lr_0 = 1.0505e-04
Loss = 1.6849e-03, PNorm = 184.9925, GNorm = 0.0764, lr_0 = 1.0498e-04
Loss = 2.3825e-03, PNorm = 184.9915, GNorm = 0.1168, lr_0 = 1.0491e-04
Loss = 1.4372e-03, PNorm = 184.9914, GNorm = 0.0511, lr_0 = 1.0483e-04
Loss = 1.2001e-03, PNorm = 184.9916, GNorm = 0.0902, lr_0 = 1.0476e-04
Loss = 6.0184e-03, PNorm = 184.9926, GNorm = 0.0937, lr_0 = 1.0469e-04
Loss = 3.1634e-03, PNorm = 184.9926, GNorm = 0.0697, lr_0 = 1.0462e-04
Loss = 3.8050e-03, PNorm = 184.9935, GNorm = 0.0842, lr_0 = 1.0455e-04
Loss = 5.9441e-03, PNorm = 184.9949, GNorm = 0.0425, lr_0 = 1.0448e-04
Loss = 1.7059e-03, PNorm = 184.9961, GNorm = 0.0623, lr_0 = 1.0440e-04
Loss = 8.8183e-04, PNorm = 184.9973, GNorm = 0.0442, lr_0 = 1.0433e-04
Loss = 1.6773e-03, PNorm = 184.9992, GNorm = 0.0624, lr_0 = 1.0426e-04
Loss = 6.9575e-03, PNorm = 185.0006, GNorm = 0.1952, lr_0 = 1.0419e-04
Loss = 3.1685e-03, PNorm = 185.0012, GNorm = 0.1295, lr_0 = 1.0412e-04
Loss = 1.4704e-03, PNorm = 185.0026, GNorm = 0.0597, lr_0 = 1.0405e-04
Loss = 4.0956e-03, PNorm = 185.0040, GNorm = 0.0448, lr_0 = 1.0398e-04
Loss = 1.4714e-03, PNorm = 185.0050, GNorm = 0.1123, lr_0 = 1.0391e-04
Loss = 2.5250e-03, PNorm = 185.0049, GNorm = 0.1917, lr_0 = 1.0383e-04
Loss = 8.5581e-04, PNorm = 185.0053, GNorm = 0.1033, lr_0 = 1.0376e-04
Loss = 1.8932e-03, PNorm = 185.0069, GNorm = 0.0829, lr_0 = 1.0369e-04
Loss = 3.2835e-03, PNorm = 185.0077, GNorm = 0.0875, lr_0 = 1.0362e-04
Loss = 1.4265e-03, PNorm = 185.0084, GNorm = 0.1278, lr_0 = 1.0355e-04
Loss = 9.3031e-04, PNorm = 185.0092, GNorm = 0.1454, lr_0 = 1.0348e-04
Loss = 3.6324e-03, PNorm = 185.0107, GNorm = 0.0732, lr_0 = 1.0341e-04
Loss = 9.7833e-04, PNorm = 185.0120, GNorm = 0.1316, lr_0 = 1.0334e-04
Loss = 1.4575e-03, PNorm = 185.0129, GNorm = 0.1424, lr_0 = 1.0327e-04
Loss = 1.2119e-03, PNorm = 185.0142, GNorm = 0.1523, lr_0 = 1.0320e-04
Loss = 2.5299e-03, PNorm = 185.0150, GNorm = 0.1568, lr_0 = 1.0312e-04
Loss = 3.0200e-03, PNorm = 185.0166, GNorm = 0.0799, lr_0 = 1.0305e-04
Loss = 1.2380e-03, PNorm = 185.0183, GNorm = 0.0796, lr_0 = 1.0298e-04
Loss = 3.3303e-03, PNorm = 185.0198, GNorm = 0.1048, lr_0 = 1.0291e-04
Loss = 1.0067e-02, PNorm = 185.0217, GNorm = 0.0647, lr_0 = 1.0284e-04
Loss = 1.7340e-03, PNorm = 185.0224, GNorm = 0.0807, lr_0 = 1.0277e-04
Loss = 1.6494e-03, PNorm = 185.0233, GNorm = 0.1755, lr_0 = 1.0270e-04
Loss = 7.2432e-03, PNorm = 185.0256, GNorm = 0.0544, lr_0 = 1.0263e-04
Loss = 4.9907e-03, PNorm = 185.0260, GNorm = 0.2370, lr_0 = 1.0256e-04
Loss = 1.9325e-03, PNorm = 185.0266, GNorm = 0.0331, lr_0 = 1.0249e-04
Loss = 1.3794e-03, PNorm = 185.0272, GNorm = 0.1366, lr_0 = 1.0242e-04
Loss = 2.5321e-03, PNorm = 185.0288, GNorm = 0.0606, lr_0 = 1.0235e-04
Loss = 6.8170e-03, PNorm = 185.0293, GNorm = 1.2923, lr_0 = 1.0228e-04
Loss = 2.0919e-03, PNorm = 185.0295, GNorm = 0.0882, lr_0 = 1.0221e-04
Loss = 1.9790e-03, PNorm = 185.0306, GNorm = 0.2671, lr_0 = 1.0214e-04
Loss = 1.4946e-03, PNorm = 185.0313, GNorm = 0.0627, lr_0 = 1.0207e-04
Loss = 2.2460e-03, PNorm = 185.0329, GNorm = 0.2534, lr_0 = 1.0200e-04
Loss = 7.6464e-03, PNorm = 185.0341, GNorm = 0.0605, lr_0 = 1.0193e-04
Loss = 2.0047e-03, PNorm = 185.0355, GNorm = 0.0637, lr_0 = 1.0186e-04
Loss = 3.4308e-03, PNorm = 185.0365, GNorm = 0.0524, lr_0 = 1.0179e-04
Loss = 2.3566e-03, PNorm = 185.0387, GNorm = 0.0920, lr_0 = 1.0172e-04
Loss = 5.7678e-03, PNorm = 185.0400, GNorm = 0.0700, lr_0 = 1.0165e-04
Loss = 8.6453e-03, PNorm = 185.0398, GNorm = 0.0763, lr_0 = 1.0158e-04
Loss = 4.4018e-03, PNorm = 185.0408, GNorm = 0.0484, lr_0 = 1.0151e-04
Loss = 1.5651e-03, PNorm = 185.0425, GNorm = 0.2756, lr_0 = 1.0144e-04
Loss = 3.5216e-03, PNorm = 185.0457, GNorm = 0.0468, lr_0 = 1.0137e-04
Loss = 1.3926e-03, PNorm = 185.0475, GNorm = 0.0766, lr_0 = 1.0130e-04
Loss = 5.7983e-03, PNorm = 185.0492, GNorm = 0.1181, lr_0 = 1.0123e-04
Loss = 1.5582e-03, PNorm = 185.0526, GNorm = 0.1565, lr_0 = 1.0116e-04
Loss = 3.0068e-03, PNorm = 185.0553, GNorm = 0.0900, lr_0 = 1.0110e-04
Loss = 1.9084e-03, PNorm = 185.0566, GNorm = 0.0516, lr_0 = 1.0103e-04
Loss = 2.0544e-03, PNorm = 185.0585, GNorm = 0.1196, lr_0 = 1.0096e-04
Loss = 2.8431e-03, PNorm = 185.0596, GNorm = 0.0891, lr_0 = 1.0089e-04
Loss = 3.2196e-03, PNorm = 185.0595, GNorm = 0.0405, lr_0 = 1.0082e-04
Loss = 6.4147e-03, PNorm = 185.0595, GNorm = 0.0749, lr_0 = 1.0075e-04
Loss = 2.3625e-03, PNorm = 185.0600, GNorm = 0.0753, lr_0 = 1.0068e-04
Loss = 1.4859e-03, PNorm = 185.0613, GNorm = 0.0888, lr_0 = 1.0061e-04
Loss = 4.1062e-03, PNorm = 185.0628, GNorm = 0.7518, lr_0 = 1.0054e-04
Loss = 7.2124e-03, PNorm = 185.0656, GNorm = 0.0810, lr_0 = 1.0047e-04
Loss = 2.5198e-03, PNorm = 185.0678, GNorm = 0.0740, lr_0 = 1.0041e-04
Loss = 1.0031e-03, PNorm = 185.0697, GNorm = 0.0858, lr_0 = 1.0034e-04
Loss = 7.5729e-03, PNorm = 185.0709, GNorm = 0.0343, lr_0 = 1.0027e-04
Loss = 2.4308e-03, PNorm = 185.0725, GNorm = 0.1094, lr_0 = 1.0020e-04
Loss = 3.8765e-03, PNorm = 185.0741, GNorm = 0.1207, lr_0 = 1.0013e-04
Loss = 1.2867e-03, PNorm = 185.0758, GNorm = 0.0671, lr_0 = 1.0006e-04
Loss = 5.4230e-03, PNorm = 185.0777, GNorm = 0.0410, lr_0 = 1.0000e-04
Validation mae = 0.120588
Model 0 best validation mae = 0.120588 on epoch 29
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119693
Ensemble test mae = 0.119693
Fold 9
Splitting data with seed 9
Total size = 60,000 | train size = 60,000 | val size = 20,000 | test size = 20,000
Fitting scaler
Building model 0
MoleculeModel(
  (encoder): MPN()
  (ffn): Sequential(
    (0): Dropout(p=0.0, inplace=False)
    (1): Linear(in_features=2048, out_features=2100, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.0, inplace=False)
    (4): Linear(in_features=2100, out_features=2100, bias=True)
    (5): ReLU()
    (6): Dropout(p=0.0, inplace=False)
    (7): Linear(in_features=2100, out_features=1, bias=True)
  )
)
Number of parameters = 8,717,101
Moving model to cuda
Epoch 0
Loss = 9.5710e-01, PNorm = 64.6282, GNorm = 1.7146, lr_0 = 1.0413e-04
Loss = 8.5526e-01, PNorm = 64.6391, GNorm = 1.9740, lr_0 = 1.0788e-04
Loss = 7.7196e-01, PNorm = 64.6496, GNorm = 1.9876, lr_0 = 1.1163e-04
Loss = 6.9933e-01, PNorm = 64.6587, GNorm = 3.2122, lr_0 = 1.1537e-04
Loss = 6.8247e-01, PNorm = 64.6678, GNorm = 2.2702, lr_0 = 1.1913e-04
Loss = 7.1826e-01, PNorm = 64.6788, GNorm = 2.3286, lr_0 = 1.2287e-04
Loss = 7.5356e-01, PNorm = 64.6877, GNorm = 2.4394, lr_0 = 1.2663e-04
Loss = 6.8055e-01, PNorm = 64.6982, GNorm = 2.4577, lr_0 = 1.3038e-04
Loss = 6.6247e-01, PNorm = 64.7093, GNorm = 2.1363, lr_0 = 1.3413e-04
Loss = 7.1106e-01, PNorm = 64.7197, GNorm = 4.2488, lr_0 = 1.3788e-04
Loss = 6.4443e-01, PNorm = 64.7306, GNorm = 2.1529, lr_0 = 1.4163e-04
Loss = 5.8469e-01, PNorm = 64.7387, GNorm = 2.7534, lr_0 = 1.4537e-04
Loss = 6.3990e-01, PNorm = 64.7493, GNorm = 2.0995, lr_0 = 1.4913e-04
Loss = 7.4155e-01, PNorm = 64.7602, GNorm = 2.2163, lr_0 = 1.5288e-04
Loss = 7.1890e-01, PNorm = 64.7731, GNorm = 2.0036, lr_0 = 1.5662e-04
Loss = 6.0646e-01, PNorm = 64.7841, GNorm = 3.5334, lr_0 = 1.6038e-04
Loss = 6.1912e-01, PNorm = 64.7950, GNorm = 2.5730, lr_0 = 1.6412e-04
Loss = 6.3729e-01, PNorm = 64.8088, GNorm = 2.7845, lr_0 = 1.6788e-04
Loss = 6.1515e-01, PNorm = 64.8226, GNorm = 2.6807, lr_0 = 1.7163e-04
Loss = 5.3090e-01, PNorm = 64.8361, GNorm = 3.1727, lr_0 = 1.7538e-04
Loss = 6.0560e-01, PNorm = 64.8502, GNorm = 1.8135, lr_0 = 1.7913e-04
Loss = 6.5816e-01, PNorm = 64.8637, GNorm = 1.8229, lr_0 = 1.8288e-04
Loss = 5.3130e-01, PNorm = 64.8786, GNorm = 2.0065, lr_0 = 1.8662e-04
Loss = 5.5642e-01, PNorm = 64.8925, GNorm = 1.8904, lr_0 = 1.9038e-04
Loss = 5.6586e-01, PNorm = 64.9067, GNorm = 1.7993, lr_0 = 1.9413e-04
Loss = 6.0615e-01, PNorm = 64.9203, GNorm = 1.9572, lr_0 = 1.9788e-04
Loss = 6.0074e-01, PNorm = 64.9358, GNorm = 2.3988, lr_0 = 2.0163e-04
Loss = 5.1691e-01, PNorm = 64.9525, GNorm = 1.8301, lr_0 = 2.0537e-04
Loss = 6.2514e-01, PNorm = 64.9699, GNorm = 2.0834, lr_0 = 2.0913e-04
Loss = 5.9888e-01, PNorm = 64.9859, GNorm = 2.3071, lr_0 = 2.1288e-04
Loss = 5.2323e-01, PNorm = 65.0038, GNorm = 2.0065, lr_0 = 2.1663e-04
Loss = 5.3703e-01, PNorm = 65.0223, GNorm = 1.7775, lr_0 = 2.2038e-04
Loss = 5.4769e-01, PNorm = 65.0403, GNorm = 1.5750, lr_0 = 2.2412e-04
Loss = 6.2774e-01, PNorm = 65.0593, GNorm = 2.8459, lr_0 = 2.2787e-04
Loss = 5.8297e-01, PNorm = 65.0800, GNorm = 1.9636, lr_0 = 2.3163e-04
Loss = 4.8125e-01, PNorm = 65.0982, GNorm = 1.8131, lr_0 = 2.3538e-04
Loss = 4.7988e-01, PNorm = 65.1161, GNorm = 1.7754, lr_0 = 2.3913e-04
Loss = 5.0430e-01, PNorm = 65.1383, GNorm = 1.7430, lr_0 = 2.4288e-04
Loss = 5.4731e-01, PNorm = 65.1581, GNorm = 2.1311, lr_0 = 2.4662e-04
Loss = 5.5566e-01, PNorm = 65.1779, GNorm = 1.8844, lr_0 = 2.5038e-04
Loss = 4.9968e-01, PNorm = 65.2002, GNorm = 1.4967, lr_0 = 2.5413e-04
Loss = 5.5530e-01, PNorm = 65.2236, GNorm = 2.2120, lr_0 = 2.5788e-04
Loss = 5.5728e-01, PNorm = 65.2485, GNorm = 1.2677, lr_0 = 2.6163e-04
Loss = 5.4421e-01, PNorm = 65.2748, GNorm = 1.7115, lr_0 = 2.6537e-04
Loss = 5.9376e-01, PNorm = 65.3005, GNorm = 2.0430, lr_0 = 2.6912e-04
Loss = 6.0789e-01, PNorm = 65.3245, GNorm = 2.1120, lr_0 = 2.7288e-04
Loss = 4.8793e-01, PNorm = 65.3542, GNorm = 1.8742, lr_0 = 2.7663e-04
Loss = 5.5815e-01, PNorm = 65.3788, GNorm = 1.4051, lr_0 = 2.8038e-04
Loss = 5.4497e-01, PNorm = 65.4087, GNorm = 1.6177, lr_0 = 2.8413e-04
Loss = 5.2192e-01, PNorm = 65.4374, GNorm = 1.5552, lr_0 = 2.8787e-04
Loss = 5.3912e-01, PNorm = 65.4657, GNorm = 2.0153, lr_0 = 2.9163e-04
Loss = 4.7481e-01, PNorm = 65.4924, GNorm = 1.8269, lr_0 = 2.9538e-04
Loss = 5.2911e-01, PNorm = 65.5214, GNorm = 2.0151, lr_0 = 2.9913e-04
Loss = 5.2950e-01, PNorm = 65.5498, GNorm = 1.5164, lr_0 = 3.0288e-04
Loss = 5.1154e-01, PNorm = 65.5787, GNorm = 1.5935, lr_0 = 3.0662e-04
Loss = 5.4448e-01, PNorm = 65.6072, GNorm = 1.7131, lr_0 = 3.1037e-04
Loss = 5.1562e-01, PNorm = 65.6407, GNorm = 1.8537, lr_0 = 3.1413e-04
Loss = 5.1821e-01, PNorm = 65.6726, GNorm = 1.4231, lr_0 = 3.1788e-04
Loss = 4.9916e-01, PNorm = 65.7070, GNorm = 1.7462, lr_0 = 3.2163e-04
Loss = 5.1668e-01, PNorm = 65.7362, GNorm = 1.5936, lr_0 = 3.2538e-04
Loss = 5.5691e-01, PNorm = 65.7704, GNorm = 1.4962, lr_0 = 3.2912e-04
Loss = 5.8799e-01, PNorm = 65.8055, GNorm = 2.1146, lr_0 = 3.3288e-04
Loss = 5.7652e-01, PNorm = 65.8408, GNorm = 1.9794, lr_0 = 3.3663e-04
Loss = 5.3511e-01, PNorm = 65.8770, GNorm = 1.4756, lr_0 = 3.4038e-04
Loss = 5.7006e-01, PNorm = 65.9205, GNorm = 1.2109, lr_0 = 3.4413e-04
Loss = 5.3822e-01, PNorm = 65.9599, GNorm = 1.8322, lr_0 = 3.4787e-04
Loss = 4.9663e-01, PNorm = 65.9952, GNorm = 1.2862, lr_0 = 3.5162e-04
Loss = 4.8780e-01, PNorm = 66.0295, GNorm = 1.5370, lr_0 = 3.5538e-04
Loss = 5.2721e-01, PNorm = 66.0634, GNorm = 1.1841, lr_0 = 3.5913e-04
Loss = 5.3937e-01, PNorm = 66.0968, GNorm = 1.6674, lr_0 = 3.6288e-04
Loss = 5.9702e-01, PNorm = 66.1324, GNorm = 1.4716, lr_0 = 3.6662e-04
Loss = 5.3615e-01, PNorm = 66.1703, GNorm = 1.3733, lr_0 = 3.7037e-04
Loss = 5.0704e-01, PNorm = 66.2088, GNorm = 1.1100, lr_0 = 3.7413e-04
Loss = 5.5201e-01, PNorm = 66.2430, GNorm = 1.3613, lr_0 = 3.7788e-04
Loss = 5.4223e-01, PNorm = 66.2850, GNorm = 1.4724, lr_0 = 3.8163e-04
Loss = 5.2369e-01, PNorm = 66.3273, GNorm = 1.3038, lr_0 = 3.8537e-04
Loss = 4.9663e-01, PNorm = 66.3666, GNorm = 1.0966, lr_0 = 3.8912e-04
Loss = 5.6262e-01, PNorm = 66.4085, GNorm = 1.3472, lr_0 = 3.9287e-04
Loss = 5.4088e-01, PNorm = 66.4545, GNorm = 1.5922, lr_0 = 3.9663e-04
Loss = 4.4745e-01, PNorm = 66.4987, GNorm = 2.3289, lr_0 = 4.0038e-04
Loss = 6.1553e-01, PNorm = 66.5441, GNorm = 1.1749, lr_0 = 4.0413e-04
Loss = 4.9852e-01, PNorm = 66.5948, GNorm = 2.1153, lr_0 = 4.0787e-04
Loss = 5.2874e-01, PNorm = 66.6374, GNorm = 1.4590, lr_0 = 4.1162e-04
Loss = 5.1253e-01, PNorm = 66.6852, GNorm = 1.3633, lr_0 = 4.1537e-04
Loss = 5.7256e-01, PNorm = 66.7360, GNorm = 1.7816, lr_0 = 4.1913e-04
Loss = 4.5358e-01, PNorm = 66.7817, GNorm = 1.3539, lr_0 = 4.2288e-04
Loss = 5.0067e-01, PNorm = 66.8256, GNorm = 1.7655, lr_0 = 4.2662e-04
Loss = 5.6294e-01, PNorm = 66.8714, GNorm = 1.0446, lr_0 = 4.3037e-04
Loss = 5.0531e-01, PNorm = 66.9175, GNorm = 1.7142, lr_0 = 4.3412e-04
Loss = 4.9985e-01, PNorm = 66.9709, GNorm = 1.7619, lr_0 = 4.3788e-04
Loss = 5.0924e-01, PNorm = 67.0181, GNorm = 1.2648, lr_0 = 4.4163e-04
Loss = 5.0172e-01, PNorm = 67.0656, GNorm = 1.2110, lr_0 = 4.4538e-04
Loss = 5.2928e-01, PNorm = 67.1160, GNorm = 1.5159, lr_0 = 4.4912e-04
Loss = 4.8920e-01, PNorm = 67.1668, GNorm = 1.2882, lr_0 = 4.5287e-04
Loss = 5.4824e-01, PNorm = 67.2174, GNorm = 1.6348, lr_0 = 4.5662e-04
Loss = 5.7393e-01, PNorm = 67.2695, GNorm = 1.6306, lr_0 = 4.6038e-04
Loss = 4.4151e-01, PNorm = 67.3222, GNorm = 1.2088, lr_0 = 4.6413e-04
Loss = 5.4318e-01, PNorm = 67.3716, GNorm = 1.9838, lr_0 = 4.6787e-04
Loss = 4.9161e-01, PNorm = 67.4235, GNorm = 1.6141, lr_0 = 4.7162e-04
Loss = 4.5319e-01, PNorm = 67.4726, GNorm = 1.1287, lr_0 = 4.7537e-04
Loss = 5.4438e-01, PNorm = 67.5185, GNorm = 1.2334, lr_0 = 4.7913e-04
Loss = 4.5451e-01, PNorm = 67.5719, GNorm = 1.3126, lr_0 = 4.8288e-04
Loss = 5.6020e-01, PNorm = 67.6186, GNorm = 1.4684, lr_0 = 4.8663e-04
Loss = 4.8801e-01, PNorm = 67.6730, GNorm = 1.3144, lr_0 = 4.9038e-04
Loss = 4.8916e-01, PNorm = 67.7224, GNorm = 1.4007, lr_0 = 4.9412e-04
Loss = 4.7371e-01, PNorm = 67.7750, GNorm = 1.4175, lr_0 = 4.9788e-04
Loss = 5.2909e-01, PNorm = 67.8355, GNorm = 1.1369, lr_0 = 5.0163e-04
Loss = 5.0543e-01, PNorm = 67.8957, GNorm = 1.2320, lr_0 = 5.0538e-04
Loss = 4.9961e-01, PNorm = 67.9625, GNorm = 1.2912, lr_0 = 5.0913e-04
Loss = 5.3637e-01, PNorm = 68.0273, GNorm = 2.0396, lr_0 = 5.1287e-04
Loss = 5.0144e-01, PNorm = 68.0923, GNorm = 1.1034, lr_0 = 5.1663e-04
Loss = 5.1101e-01, PNorm = 68.1606, GNorm = 1.5989, lr_0 = 5.2038e-04
Loss = 4.7813e-01, PNorm = 68.2150, GNorm = 1.0179, lr_0 = 5.2413e-04
Loss = 5.7562e-01, PNorm = 68.2772, GNorm = 1.4349, lr_0 = 5.2788e-04
Loss = 4.8122e-01, PNorm = 68.3367, GNorm = 1.2918, lr_0 = 5.3162e-04
Loss = 4.7686e-01, PNorm = 68.3978, GNorm = 1.0054, lr_0 = 5.3538e-04
Loss = 4.6454e-01, PNorm = 68.4609, GNorm = 1.5532, lr_0 = 5.3912e-04
Loss = 5.4311e-01, PNorm = 68.5233, GNorm = 1.5487, lr_0 = 5.4288e-04
Loss = 5.1577e-01, PNorm = 68.5894, GNorm = 1.4499, lr_0 = 5.4663e-04
Loss = 4.4226e-01, PNorm = 68.6591, GNorm = 1.1452, lr_0 = 5.5038e-04
Validation mae = 0.129268
Epoch 1
Loss = 3.6465e-01, PNorm = 68.7326, GNorm = 1.2882, lr_0 = 5.5413e-04
Loss = 3.1583e-01, PNorm = 68.8024, GNorm = 1.1093, lr_0 = 5.5787e-04
Loss = 3.3589e-01, PNorm = 68.8697, GNorm = 1.2467, lr_0 = 5.6163e-04
Loss = 4.2642e-01, PNorm = 68.9416, GNorm = 1.5144, lr_0 = 5.6538e-04
Loss = 3.7227e-01, PNorm = 69.0225, GNorm = 1.4552, lr_0 = 5.6913e-04
Loss = 3.9056e-01, PNorm = 69.1067, GNorm = 1.4104, lr_0 = 5.7288e-04
Loss = 3.9132e-01, PNorm = 69.1905, GNorm = 1.0820, lr_0 = 5.7662e-04
Loss = 4.2552e-01, PNorm = 69.2786, GNorm = 0.7811, lr_0 = 5.8038e-04
Loss = 3.3335e-01, PNorm = 69.3659, GNorm = 1.8887, lr_0 = 5.8413e-04
Loss = 4.5535e-01, PNorm = 69.4632, GNorm = 1.6380, lr_0 = 5.8788e-04
Loss = 3.8139e-01, PNorm = 69.5614, GNorm = 0.9866, lr_0 = 5.9163e-04
Loss = 3.8706e-01, PNorm = 69.6644, GNorm = 0.9462, lr_0 = 5.9538e-04
Loss = 3.2166e-01, PNorm = 69.7534, GNorm = 1.0067, lr_0 = 5.9913e-04
Loss = 3.9394e-01, PNorm = 69.8431, GNorm = 1.1987, lr_0 = 6.0288e-04
Loss = 3.7885e-01, PNorm = 69.9361, GNorm = 1.5577, lr_0 = 6.0663e-04
Loss = 3.8056e-01, PNorm = 70.0329, GNorm = 1.2142, lr_0 = 6.1038e-04
Loss = 3.8932e-01, PNorm = 70.1199, GNorm = 1.3784, lr_0 = 6.1413e-04
Loss = 4.3067e-01, PNorm = 70.2317, GNorm = 1.2818, lr_0 = 6.1788e-04
Loss = 4.2144e-01, PNorm = 70.3494, GNorm = 1.5484, lr_0 = 6.2163e-04
Loss = 3.8160e-01, PNorm = 70.4630, GNorm = 1.8490, lr_0 = 6.2538e-04
Loss = 3.6856e-01, PNorm = 70.5878, GNorm = 1.3544, lr_0 = 6.2913e-04
Loss = 3.9143e-01, PNorm = 70.6968, GNorm = 1.4283, lr_0 = 6.3288e-04
Loss = 3.7351e-01, PNorm = 70.8181, GNorm = 1.6386, lr_0 = 6.3663e-04
Loss = 3.7407e-01, PNorm = 70.9373, GNorm = 1.2579, lr_0 = 6.4038e-04
Loss = 4.2257e-01, PNorm = 71.0511, GNorm = 0.7742, lr_0 = 6.4413e-04
Loss = 4.3743e-01, PNorm = 71.1753, GNorm = 1.5041, lr_0 = 6.4788e-04
Loss = 4.0869e-01, PNorm = 71.2990, GNorm = 2.2099, lr_0 = 6.5163e-04
Loss = 4.3053e-01, PNorm = 71.4087, GNorm = 1.6858, lr_0 = 6.5538e-04
Loss = 3.9116e-01, PNorm = 71.5354, GNorm = 1.1473, lr_0 = 6.5913e-04
Loss = 4.0261e-01, PNorm = 71.6529, GNorm = 1.4570, lr_0 = 6.6288e-04
Loss = 4.6634e-01, PNorm = 71.7734, GNorm = 1.4450, lr_0 = 6.6663e-04
Loss = 4.0108e-01, PNorm = 71.9012, GNorm = 1.2831, lr_0 = 6.7038e-04
Loss = 4.3013e-01, PNorm = 72.0195, GNorm = 1.2177, lr_0 = 6.7413e-04
Loss = 3.4866e-01, PNorm = 72.1407, GNorm = 1.0284, lr_0 = 6.7788e-04
Loss = 4.3664e-01, PNorm = 72.2666, GNorm = 1.3584, lr_0 = 6.8163e-04
Loss = 3.8638e-01, PNorm = 72.3832, GNorm = 1.4913, lr_0 = 6.8538e-04
Loss = 5.1120e-01, PNorm = 72.5180, GNorm = 1.6538, lr_0 = 6.8913e-04
Loss = 4.1891e-01, PNorm = 72.6507, GNorm = 1.1767, lr_0 = 6.9288e-04
Loss = 5.2490e-01, PNorm = 72.7937, GNorm = 1.3731, lr_0 = 6.9663e-04
Loss = 4.5146e-01, PNorm = 72.9271, GNorm = 0.9657, lr_0 = 7.0038e-04
Loss = 4.0611e-01, PNorm = 73.0665, GNorm = 1.4836, lr_0 = 7.0413e-04
Loss = 4.5901e-01, PNorm = 73.1902, GNorm = 1.1283, lr_0 = 7.0788e-04
Loss = 4.1867e-01, PNorm = 73.3168, GNorm = 1.4759, lr_0 = 7.1163e-04
Loss = 4.4034e-01, PNorm = 73.4429, GNorm = 1.0932, lr_0 = 7.1538e-04
Loss = 4.1655e-01, PNorm = 73.5743, GNorm = 1.2630, lr_0 = 7.1913e-04
Loss = 4.0893e-01, PNorm = 73.7079, GNorm = 1.0235, lr_0 = 7.2288e-04
Loss = 4.0834e-01, PNorm = 73.8481, GNorm = 1.5961, lr_0 = 7.2663e-04
Loss = 4.3857e-01, PNorm = 73.9862, GNorm = 1.3364, lr_0 = 7.3038e-04
Loss = 4.1423e-01, PNorm = 74.1203, GNorm = 1.2159, lr_0 = 7.3413e-04
Loss = 4.3881e-01, PNorm = 74.2541, GNorm = 1.1130, lr_0 = 7.3788e-04
Loss = 4.1864e-01, PNorm = 74.3824, GNorm = 0.8556, lr_0 = 7.4163e-04
Loss = 4.3240e-01, PNorm = 74.5158, GNorm = 1.3603, lr_0 = 7.4538e-04
Loss = 4.3436e-01, PNorm = 74.6574, GNorm = 1.2545, lr_0 = 7.4913e-04
Loss = 3.6569e-01, PNorm = 74.7891, GNorm = 1.1033, lr_0 = 7.5288e-04
Loss = 4.2454e-01, PNorm = 74.9318, GNorm = 1.1140, lr_0 = 7.5663e-04
Loss = 3.9763e-01, PNorm = 75.0671, GNorm = 2.1907, lr_0 = 7.6038e-04
Loss = 4.4721e-01, PNorm = 75.2028, GNorm = 1.1672, lr_0 = 7.6413e-04
Loss = 3.9749e-01, PNorm = 75.3460, GNorm = 1.3198, lr_0 = 7.6788e-04
Loss = 4.1365e-01, PNorm = 75.4895, GNorm = 1.1084, lr_0 = 7.7163e-04
Loss = 4.1432e-01, PNorm = 75.6200, GNorm = 1.1475, lr_0 = 7.7538e-04
Loss = 4.6527e-01, PNorm = 75.7539, GNorm = 1.4135, lr_0 = 7.7913e-04
Loss = 4.4775e-01, PNorm = 75.8877, GNorm = 1.1186, lr_0 = 7.8288e-04
Loss = 4.3031e-01, PNorm = 76.0142, GNorm = 1.2390, lr_0 = 7.8663e-04
Loss = 4.0566e-01, PNorm = 76.1503, GNorm = 1.3364, lr_0 = 7.9038e-04
Loss = 4.1779e-01, PNorm = 76.2760, GNorm = 0.9940, lr_0 = 7.9413e-04
Loss = 4.6535e-01, PNorm = 76.4079, GNorm = 1.3327, lr_0 = 7.9788e-04
Loss = 4.0243e-01, PNorm = 76.5413, GNorm = 1.0974, lr_0 = 8.0163e-04
Loss = 3.9785e-01, PNorm = 76.6741, GNorm = 0.9979, lr_0 = 8.0538e-04
Loss = 4.1746e-01, PNorm = 76.8087, GNorm = 1.3499, lr_0 = 8.0913e-04
Loss = 4.3025e-01, PNorm = 76.9472, GNorm = 1.2305, lr_0 = 8.1288e-04
Loss = 4.4781e-01, PNorm = 77.0854, GNorm = 0.9170, lr_0 = 8.1663e-04
Loss = 4.1750e-01, PNorm = 77.2348, GNorm = 1.1823, lr_0 = 8.2038e-04
Loss = 4.4386e-01, PNorm = 77.3819, GNorm = 1.6196, lr_0 = 8.2413e-04
Loss = 4.0584e-01, PNorm = 77.5322, GNorm = 1.4495, lr_0 = 8.2788e-04
Loss = 4.6768e-01, PNorm = 77.6949, GNorm = 1.2435, lr_0 = 8.3163e-04
Loss = 4.5614e-01, PNorm = 77.8670, GNorm = 2.7016, lr_0 = 8.3538e-04
Loss = 4.8212e-01, PNorm = 78.0448, GNorm = 1.4329, lr_0 = 8.3913e-04
Loss = 4.1260e-01, PNorm = 78.2158, GNorm = 1.5516, lr_0 = 8.4288e-04
Loss = 3.6346e-01, PNorm = 78.3634, GNorm = 0.7840, lr_0 = 8.4663e-04
Loss = 3.6857e-01, PNorm = 78.5163, GNorm = 1.1061, lr_0 = 8.5038e-04
Loss = 4.0623e-01, PNorm = 78.6607, GNorm = 1.2586, lr_0 = 8.5413e-04
Loss = 3.9544e-01, PNorm = 78.8152, GNorm = 1.4248, lr_0 = 8.5788e-04
Loss = 3.7593e-01, PNorm = 78.9576, GNorm = 0.9756, lr_0 = 8.6163e-04
Loss = 4.3196e-01, PNorm = 79.1003, GNorm = 1.4550, lr_0 = 8.6538e-04
Loss = 4.2470e-01, PNorm = 79.2459, GNorm = 1.0741, lr_0 = 8.6913e-04
Loss = 4.3014e-01, PNorm = 79.3868, GNorm = 1.0911, lr_0 = 8.7288e-04
Loss = 5.7921e-01, PNorm = 79.5330, GNorm = 1.2708, lr_0 = 8.7663e-04
Loss = 4.7392e-01, PNorm = 79.7047, GNorm = 1.3958, lr_0 = 8.8038e-04
Loss = 4.0941e-01, PNorm = 79.8580, GNorm = 1.3736, lr_0 = 8.8413e-04
Loss = 4.3210e-01, PNorm = 80.0171, GNorm = 0.9980, lr_0 = 8.8788e-04
Loss = 4.2799e-01, PNorm = 80.1736, GNorm = 1.2071, lr_0 = 8.9163e-04
Loss = 4.6603e-01, PNorm = 80.3275, GNorm = 2.4490, lr_0 = 8.9538e-04
Loss = 3.9602e-01, PNorm = 80.4864, GNorm = 1.2154, lr_0 = 8.9913e-04
Loss = 3.9086e-01, PNorm = 80.6394, GNorm = 0.9135, lr_0 = 9.0288e-04
Loss = 4.1611e-01, PNorm = 80.7834, GNorm = 0.9862, lr_0 = 9.0663e-04
Loss = 4.5479e-01, PNorm = 80.9243, GNorm = 1.0127, lr_0 = 9.1038e-04
Loss = 4.4165e-01, PNorm = 81.0665, GNorm = 1.1492, lr_0 = 9.1413e-04
Loss = 4.6573e-01, PNorm = 81.2077, GNorm = 0.8975, lr_0 = 9.1788e-04
Loss = 4.3277e-01, PNorm = 81.3535, GNorm = 1.2665, lr_0 = 9.2163e-04
Loss = 4.0786e-01, PNorm = 81.4933, GNorm = 1.0405, lr_0 = 9.2538e-04
Loss = 4.5588e-01, PNorm = 81.6453, GNorm = 0.9896, lr_0 = 9.2913e-04
Loss = 4.4089e-01, PNorm = 81.7971, GNorm = 1.2498, lr_0 = 9.3288e-04
Loss = 4.6489e-01, PNorm = 81.9656, GNorm = 1.3899, lr_0 = 9.3663e-04
Loss = 4.3618e-01, PNorm = 82.1299, GNorm = 0.8727, lr_0 = 9.4038e-04
Loss = 4.0102e-01, PNorm = 82.2862, GNorm = 1.0152, lr_0 = 9.4413e-04
Loss = 4.0025e-01, PNorm = 82.4408, GNorm = 1.2503, lr_0 = 9.4788e-04
Loss = 4.5609e-01, PNorm = 82.5967, GNorm = 1.2100, lr_0 = 9.5163e-04
Loss = 4.2622e-01, PNorm = 82.7738, GNorm = 1.1658, lr_0 = 9.5538e-04
Loss = 4.4430e-01, PNorm = 82.9330, GNorm = 1.7944, lr_0 = 9.5913e-04
Loss = 4.0794e-01, PNorm = 83.0908, GNorm = 1.4769, lr_0 = 9.6288e-04
Loss = 4.8743e-01, PNorm = 83.2582, GNorm = 1.6436, lr_0 = 9.6663e-04
Loss = 3.6965e-01, PNorm = 83.4092, GNorm = 1.0770, lr_0 = 9.7038e-04
Loss = 4.5283e-01, PNorm = 83.5582, GNorm = 1.1020, lr_0 = 9.7413e-04
Loss = 3.8427e-01, PNorm = 83.7176, GNorm = 1.1428, lr_0 = 9.7788e-04
Loss = 4.5981e-01, PNorm = 83.8855, GNorm = 1.2244, lr_0 = 9.8163e-04
Loss = 4.1533e-01, PNorm = 84.0543, GNorm = 1.5188, lr_0 = 9.8537e-04
Loss = 4.1024e-01, PNorm = 84.2196, GNorm = 1.0083, lr_0 = 9.8912e-04
Loss = 3.8880e-01, PNorm = 84.3911, GNorm = 1.1082, lr_0 = 9.9288e-04
Loss = 3.5403e-01, PNorm = 84.5390, GNorm = 0.9133, lr_0 = 9.9663e-04
Loss = 4.4735e-01, PNorm = 84.7036, GNorm = 1.0640, lr_0 = 9.9993e-04
Validation mae = 0.126250
Epoch 2
Loss = 2.8934e-01, PNorm = 84.8614, GNorm = 0.8756, lr_0 = 9.9925e-04
Loss = 2.9011e-01, PNorm = 85.0105, GNorm = 0.8740, lr_0 = 9.9856e-04
Loss = 3.3330e-01, PNorm = 85.1696, GNorm = 0.6912, lr_0 = 9.9788e-04
Loss = 2.5111e-01, PNorm = 85.3280, GNorm = 0.9250, lr_0 = 9.9719e-04
Loss = 3.0296e-01, PNorm = 85.4764, GNorm = 1.1576, lr_0 = 9.9651e-04
Loss = 3.1016e-01, PNorm = 85.6368, GNorm = 2.0166, lr_0 = 9.9583e-04
Loss = 2.8046e-01, PNorm = 85.8060, GNorm = 0.8829, lr_0 = 9.9515e-04
Loss = 2.8831e-01, PNorm = 85.9726, GNorm = 0.7431, lr_0 = 9.9446e-04
Loss = 2.6709e-01, PNorm = 86.1312, GNorm = 0.7929, lr_0 = 9.9378e-04
Loss = 2.7280e-01, PNorm = 86.2925, GNorm = 0.9243, lr_0 = 9.9310e-04
Loss = 2.8728e-01, PNorm = 86.4553, GNorm = 0.9035, lr_0 = 9.9242e-04
Loss = 2.7485e-01, PNorm = 86.6218, GNorm = 0.7968, lr_0 = 9.9174e-04
Loss = 2.6791e-01, PNorm = 86.7838, GNorm = 0.8800, lr_0 = 9.9106e-04
Loss = 2.8042e-01, PNorm = 86.9562, GNorm = 1.1620, lr_0 = 9.9038e-04
Loss = 2.5761e-01, PNorm = 87.1378, GNorm = 0.7842, lr_0 = 9.8971e-04
Loss = 3.6100e-01, PNorm = 87.3186, GNorm = 1.2874, lr_0 = 9.8903e-04
Loss = 2.7579e-01, PNorm = 87.5017, GNorm = 0.7909, lr_0 = 9.8835e-04
Loss = 2.7515e-01, PNorm = 87.6907, GNorm = 0.8258, lr_0 = 9.8767e-04
Loss = 2.6717e-01, PNorm = 87.8696, GNorm = 0.6990, lr_0 = 9.8700e-04
Loss = 2.7077e-01, PNorm = 88.0341, GNorm = 1.1954, lr_0 = 9.8632e-04
Loss = 2.6591e-01, PNorm = 88.2053, GNorm = 1.1844, lr_0 = 9.8564e-04
Loss = 2.6780e-01, PNorm = 88.3740, GNorm = 1.2737, lr_0 = 9.8497e-04
Loss = 2.6853e-01, PNorm = 88.5434, GNorm = 0.9137, lr_0 = 9.8429e-04
Loss = 2.9133e-01, PNorm = 88.7159, GNorm = 1.4932, lr_0 = 9.8362e-04
Loss = 3.1275e-01, PNorm = 88.8798, GNorm = 1.2356, lr_0 = 9.8295e-04
Loss = 2.9843e-01, PNorm = 89.0648, GNorm = 1.3326, lr_0 = 9.8227e-04
Loss = 3.0816e-01, PNorm = 89.2413, GNorm = 0.8035, lr_0 = 9.8160e-04
Loss = 2.6708e-01, PNorm = 89.4202, GNorm = 0.8392, lr_0 = 9.8093e-04
Loss = 2.9588e-01, PNorm = 89.5817, GNorm = 0.9611, lr_0 = 9.8026e-04
Loss = 3.7333e-01, PNorm = 89.7480, GNorm = 1.0973, lr_0 = 9.7958e-04
Loss = 3.3385e-01, PNorm = 89.9429, GNorm = 1.7239, lr_0 = 9.7891e-04
Loss = 3.0573e-01, PNorm = 90.1255, GNorm = 2.2647, lr_0 = 9.7824e-04
Loss = 3.3999e-01, PNorm = 90.3217, GNorm = 1.2356, lr_0 = 9.7757e-04
Loss = 2.9797e-01, PNorm = 90.5192, GNorm = 0.9671, lr_0 = 9.7690e-04
Loss = 2.9585e-01, PNorm = 90.6880, GNorm = 0.9220, lr_0 = 9.7623e-04
Loss = 2.7661e-01, PNorm = 90.8627, GNorm = 1.2317, lr_0 = 9.7556e-04
Loss = 2.8832e-01, PNorm = 91.0207, GNorm = 0.9277, lr_0 = 9.7490e-04
Loss = 3.0130e-01, PNorm = 91.1657, GNorm = 1.0546, lr_0 = 9.7423e-04
Loss = 3.4078e-01, PNorm = 91.3424, GNorm = 1.9496, lr_0 = 9.7356e-04
Loss = 3.2643e-01, PNorm = 91.5040, GNorm = 1.9767, lr_0 = 9.7289e-04
Loss = 2.8742e-01, PNorm = 91.6932, GNorm = 0.7826, lr_0 = 9.7223e-04
Loss = 3.2463e-01, PNorm = 91.8519, GNorm = 0.9322, lr_0 = 9.7156e-04
Loss = 3.7138e-01, PNorm = 92.0356, GNorm = 1.5896, lr_0 = 9.7090e-04
Loss = 3.1836e-01, PNorm = 92.2156, GNorm = 1.1744, lr_0 = 9.7023e-04
Loss = 3.4674e-01, PNorm = 92.3907, GNorm = 1.2101, lr_0 = 9.6957e-04
Loss = 2.9386e-01, PNorm = 92.5648, GNorm = 1.1875, lr_0 = 9.6890e-04
Loss = 3.0159e-01, PNorm = 92.7172, GNorm = 0.7502, lr_0 = 9.6824e-04
Loss = 3.6013e-01, PNorm = 92.8827, GNorm = 1.0564, lr_0 = 9.6757e-04
Loss = 3.0369e-01, PNorm = 93.0506, GNorm = 0.7640, lr_0 = 9.6691e-04
Loss = 3.1173e-01, PNorm = 93.2339, GNorm = 0.9675, lr_0 = 9.6625e-04
Loss = 3.3021e-01, PNorm = 93.4020, GNorm = 1.0472, lr_0 = 9.6559e-04
Loss = 2.9616e-01, PNorm = 93.5767, GNorm = 1.2047, lr_0 = 9.6493e-04
Loss = 3.0086e-01, PNorm = 93.7372, GNorm = 1.5358, lr_0 = 9.6427e-04
Loss = 3.4170e-01, PNorm = 93.9130, GNorm = 1.1075, lr_0 = 9.6360e-04
Loss = 3.2703e-01, PNorm = 94.0973, GNorm = 1.2057, lr_0 = 9.6294e-04
Loss = 3.2828e-01, PNorm = 94.2797, GNorm = 1.4517, lr_0 = 9.6228e-04
Loss = 3.3173e-01, PNorm = 94.4533, GNorm = 1.1715, lr_0 = 9.6163e-04
Loss = 3.1416e-01, PNorm = 94.6163, GNorm = 1.0354, lr_0 = 9.6097e-04
Loss = 3.0159e-01, PNorm = 94.7726, GNorm = 0.7435, lr_0 = 9.6031e-04
Loss = 2.6992e-01, PNorm = 94.9216, GNorm = 1.1743, lr_0 = 9.5965e-04
Loss = 3.1438e-01, PNorm = 95.0658, GNorm = 1.0668, lr_0 = 9.5899e-04
Loss = 3.2524e-01, PNorm = 95.2344, GNorm = 0.9441, lr_0 = 9.5834e-04
Loss = 3.2797e-01, PNorm = 95.3927, GNorm = 0.9782, lr_0 = 9.5768e-04
Loss = 2.9086e-01, PNorm = 95.5547, GNorm = 0.8564, lr_0 = 9.5702e-04
Loss = 2.8076e-01, PNorm = 95.6972, GNorm = 1.0373, lr_0 = 9.5637e-04
Loss = 3.0820e-01, PNorm = 95.8376, GNorm = 1.7827, lr_0 = 9.5571e-04
Loss = 3.1898e-01, PNorm = 95.9887, GNorm = 1.0974, lr_0 = 9.5506e-04
Loss = 2.9388e-01, PNorm = 96.1422, GNorm = 1.1457, lr_0 = 9.5440e-04
Loss = 2.9731e-01, PNorm = 96.2878, GNorm = 1.0024, lr_0 = 9.5375e-04
Loss = 3.4999e-01, PNorm = 96.4462, GNorm = 0.9084, lr_0 = 9.5310e-04
Loss = 2.4690e-01, PNorm = 96.6152, GNorm = 1.0625, lr_0 = 9.5244e-04
Loss = 3.1502e-01, PNorm = 96.7700, GNorm = 1.2790, lr_0 = 9.5179e-04
Loss = 2.8862e-01, PNorm = 96.9410, GNorm = 1.2524, lr_0 = 9.5114e-04
Loss = 3.2251e-01, PNorm = 97.1040, GNorm = 1.6190, lr_0 = 9.5049e-04
Loss = 3.1742e-01, PNorm = 97.2615, GNorm = 0.9366, lr_0 = 9.4984e-04
Loss = 3.3844e-01, PNorm = 97.4331, GNorm = 1.1303, lr_0 = 9.4919e-04
Loss = 3.0944e-01, PNorm = 97.5984, GNorm = 1.1735, lr_0 = 9.4854e-04
Loss = 3.1931e-01, PNorm = 97.7543, GNorm = 1.6246, lr_0 = 9.4789e-04
Loss = 2.9366e-01, PNorm = 97.9141, GNorm = 0.8416, lr_0 = 9.4724e-04
Loss = 3.7783e-01, PNorm = 98.0719, GNorm = 1.3438, lr_0 = 9.4659e-04
Loss = 3.0314e-01, PNorm = 98.2332, GNorm = 1.1471, lr_0 = 9.4594e-04
Loss = 3.5219e-01, PNorm = 98.3889, GNorm = 1.5494, lr_0 = 9.4529e-04
Loss = 4.1935e-01, PNorm = 98.5467, GNorm = 1.0390, lr_0 = 9.4464e-04
Loss = 2.9991e-01, PNorm = 98.7053, GNorm = 1.2516, lr_0 = 9.4400e-04
Loss = 3.3759e-01, PNorm = 98.8649, GNorm = 1.6844, lr_0 = 9.4335e-04
Loss = 3.2967e-01, PNorm = 99.0312, GNorm = 0.9851, lr_0 = 9.4270e-04
Loss = 3.1056e-01, PNorm = 99.1847, GNorm = 0.9160, lr_0 = 9.4206e-04
Loss = 3.3256e-01, PNorm = 99.3282, GNorm = 1.1368, lr_0 = 9.4141e-04
Loss = 3.1317e-01, PNorm = 99.4807, GNorm = 1.0940, lr_0 = 9.4077e-04
Loss = 3.5711e-01, PNorm = 99.6374, GNorm = 1.3514, lr_0 = 9.4012e-04
Loss = 2.9871e-01, PNorm = 99.7961, GNorm = 1.2800, lr_0 = 9.3948e-04
Loss = 3.8368e-01, PNorm = 99.9681, GNorm = 1.1148, lr_0 = 9.3884e-04
Loss = 3.3591e-01, PNorm = 100.1618, GNorm = 0.9622, lr_0 = 9.3819e-04
Loss = 3.5851e-01, PNorm = 100.3301, GNorm = 0.9453, lr_0 = 9.3755e-04
Loss = 3.6754e-01, PNorm = 100.4882, GNorm = 1.3610, lr_0 = 9.3691e-04
Loss = 3.1676e-01, PNorm = 100.6482, GNorm = 1.3274, lr_0 = 9.3627e-04
Loss = 3.1992e-01, PNorm = 100.8010, GNorm = 1.3485, lr_0 = 9.3562e-04
Loss = 3.5806e-01, PNorm = 100.9546, GNorm = 1.8685, lr_0 = 9.3498e-04
Loss = 3.2347e-01, PNorm = 101.1022, GNorm = 0.7946, lr_0 = 9.3434e-04
Loss = 3.2333e-01, PNorm = 101.2500, GNorm = 1.0147, lr_0 = 9.3370e-04
Loss = 3.5495e-01, PNorm = 101.4027, GNorm = 1.6564, lr_0 = 9.3306e-04
Loss = 3.2792e-01, PNorm = 101.5488, GNorm = 1.4998, lr_0 = 9.3242e-04
Loss = 2.9400e-01, PNorm = 101.7088, GNorm = 0.8771, lr_0 = 9.3178e-04
Loss = 3.0390e-01, PNorm = 101.8660, GNorm = 0.9646, lr_0 = 9.3115e-04
Loss = 3.1767e-01, PNorm = 102.0089, GNorm = 1.3516, lr_0 = 9.3051e-04
Loss = 3.3262e-01, PNorm = 102.1624, GNorm = 0.8231, lr_0 = 9.2987e-04
Loss = 3.5195e-01, PNorm = 102.3235, GNorm = 0.9140, lr_0 = 9.2923e-04
Loss = 2.8306e-01, PNorm = 102.4837, GNorm = 1.1409, lr_0 = 9.2860e-04
Loss = 2.8375e-01, PNorm = 102.6349, GNorm = 0.9104, lr_0 = 9.2796e-04
Loss = 3.3239e-01, PNorm = 102.7764, GNorm = 1.2451, lr_0 = 9.2733e-04
Loss = 3.8726e-01, PNorm = 102.9389, GNorm = 1.8495, lr_0 = 9.2669e-04
Loss = 3.3388e-01, PNorm = 103.0942, GNorm = 1.2910, lr_0 = 9.2606e-04
Loss = 3.5405e-01, PNorm = 103.2631, GNorm = 1.1341, lr_0 = 9.2542e-04
Loss = 2.8829e-01, PNorm = 103.4073, GNorm = 1.2320, lr_0 = 9.2479e-04
Loss = 3.1919e-01, PNorm = 103.5417, GNorm = 0.8464, lr_0 = 9.2415e-04
Loss = 3.7938e-01, PNorm = 103.6910, GNorm = 1.5812, lr_0 = 9.2352e-04
Loss = 3.7687e-01, PNorm = 103.8321, GNorm = 1.0807, lr_0 = 9.2289e-04
Loss = 3.2880e-01, PNorm = 103.9726, GNorm = 0.8561, lr_0 = 9.2226e-04
Loss = 3.9752e-01, PNorm = 104.1119, GNorm = 0.7320, lr_0 = 9.2162e-04
Loss = 3.4276e-01, PNorm = 104.2587, GNorm = 1.0562, lr_0 = 9.2099e-04
Validation mae = 0.124842
Epoch 3
Loss = 1.9387e-01, PNorm = 104.4033, GNorm = 0.8232, lr_0 = 9.2036e-04
Loss = 1.6513e-01, PNorm = 104.5124, GNorm = 0.6593, lr_0 = 9.1973e-04
Loss = 1.8874e-01, PNorm = 104.6262, GNorm = 0.8261, lr_0 = 9.1910e-04
Loss = 1.7247e-01, PNorm = 104.7118, GNorm = 1.2282, lr_0 = 9.1847e-04
Loss = 1.7110e-01, PNorm = 104.8123, GNorm = 0.7520, lr_0 = 9.1784e-04
Loss = 1.6072e-01, PNorm = 104.9055, GNorm = 0.8555, lr_0 = 9.1721e-04
Loss = 1.8227e-01, PNorm = 104.9999, GNorm = 0.6617, lr_0 = 9.1658e-04
Loss = 1.7931e-01, PNorm = 105.0913, GNorm = 0.8037, lr_0 = 9.1596e-04
Loss = 1.8127e-01, PNorm = 105.1878, GNorm = 0.7618, lr_0 = 9.1533e-04
Loss = 1.5927e-01, PNorm = 105.2867, GNorm = 0.9203, lr_0 = 9.1470e-04
Loss = 1.8792e-01, PNorm = 105.3835, GNorm = 0.7903, lr_0 = 9.1408e-04
Loss = 1.9927e-01, PNorm = 105.4920, GNorm = 1.0698, lr_0 = 9.1345e-04
Loss = 1.7182e-01, PNorm = 105.5910, GNorm = 1.6277, lr_0 = 9.1282e-04
Loss = 1.8885e-01, PNorm = 105.6839, GNorm = 0.7397, lr_0 = 9.1220e-04
Loss = 1.7535e-01, PNorm = 105.7838, GNorm = 0.6602, lr_0 = 9.1157e-04
Loss = 1.7946e-01, PNorm = 105.8880, GNorm = 0.9223, lr_0 = 9.1095e-04
Loss = 1.9873e-01, PNorm = 105.9926, GNorm = 0.9952, lr_0 = 9.1032e-04
Loss = 1.7563e-01, PNorm = 106.1035, GNorm = 0.8938, lr_0 = 9.0970e-04
Loss = 1.7055e-01, PNorm = 106.2149, GNorm = 1.0940, lr_0 = 9.0908e-04
Loss = 1.7562e-01, PNorm = 106.3163, GNorm = 1.0379, lr_0 = 9.0846e-04
Loss = 1.7323e-01, PNorm = 106.4226, GNorm = 0.8632, lr_0 = 9.0783e-04
Loss = 1.7899e-01, PNorm = 106.5338, GNorm = 0.7706, lr_0 = 9.0721e-04
Loss = 1.9198e-01, PNorm = 106.6449, GNorm = 0.8220, lr_0 = 9.0659e-04
Loss = 1.6090e-01, PNorm = 106.7583, GNorm = 1.0833, lr_0 = 9.0597e-04
Loss = 1.6317e-01, PNorm = 106.8713, GNorm = 0.7743, lr_0 = 9.0535e-04
Loss = 1.6571e-01, PNorm = 106.9715, GNorm = 0.7849, lr_0 = 9.0473e-04
Loss = 1.7166e-01, PNorm = 107.0694, GNorm = 0.9579, lr_0 = 9.0411e-04
Loss = 1.7037e-01, PNorm = 107.1719, GNorm = 0.5319, lr_0 = 9.0349e-04
Loss = 2.0708e-01, PNorm = 107.2695, GNorm = 0.8143, lr_0 = 9.0287e-04
Loss = 1.8825e-01, PNorm = 107.3846, GNorm = 0.6251, lr_0 = 9.0225e-04
Loss = 1.9782e-01, PNorm = 107.5020, GNorm = 0.6605, lr_0 = 9.0163e-04
Loss = 1.6867e-01, PNorm = 107.6242, GNorm = 1.0856, lr_0 = 9.0102e-04
Loss = 1.9847e-01, PNorm = 107.7388, GNorm = 0.8941, lr_0 = 9.0040e-04
Loss = 1.7525e-01, PNorm = 107.8570, GNorm = 0.7150, lr_0 = 8.9978e-04
Loss = 2.1120e-01, PNorm = 107.9883, GNorm = 0.9288, lr_0 = 8.9916e-04
Loss = 1.9956e-01, PNorm = 108.1171, GNorm = 0.7943, lr_0 = 8.9855e-04
Loss = 1.8228e-01, PNorm = 108.2408, GNorm = 0.9242, lr_0 = 8.9793e-04
Loss = 1.9530e-01, PNorm = 108.3636, GNorm = 1.4315, lr_0 = 8.9732e-04
Loss = 1.6961e-01, PNorm = 108.4786, GNorm = 0.6214, lr_0 = 8.9670e-04
Loss = 1.7770e-01, PNorm = 108.5958, GNorm = 0.7158, lr_0 = 8.9609e-04
Loss = 2.1110e-01, PNorm = 108.7158, GNorm = 0.8435, lr_0 = 8.9548e-04
Loss = 1.5764e-01, PNorm = 108.8367, GNorm = 0.5853, lr_0 = 8.9486e-04
Loss = 2.3775e-01, PNorm = 108.9505, GNorm = 0.7738, lr_0 = 8.9425e-04
Loss = 1.9056e-01, PNorm = 109.0695, GNorm = 0.7499, lr_0 = 8.9364e-04
Loss = 1.8478e-01, PNorm = 109.1953, GNorm = 0.7781, lr_0 = 8.9302e-04
Loss = 1.8359e-01, PNorm = 109.3149, GNorm = 0.8305, lr_0 = 8.9241e-04
Loss = 1.8266e-01, PNorm = 109.4357, GNorm = 0.6764, lr_0 = 8.9180e-04
Loss = 1.9595e-01, PNorm = 109.5503, GNorm = 1.2717, lr_0 = 8.9119e-04
Loss = 2.3318e-01, PNorm = 109.6743, GNorm = 0.8392, lr_0 = 8.9058e-04
Loss = 1.8289e-01, PNorm = 109.7999, GNorm = 0.9970, lr_0 = 8.8997e-04
Loss = 1.7937e-01, PNorm = 109.9263, GNorm = 1.2928, lr_0 = 8.8936e-04
Loss = 1.9787e-01, PNorm = 110.0500, GNorm = 0.7123, lr_0 = 8.8875e-04
Loss = 1.7363e-01, PNorm = 110.1647, GNorm = 0.9854, lr_0 = 8.8814e-04
Loss = 2.2057e-01, PNorm = 110.2887, GNorm = 0.8617, lr_0 = 8.8753e-04
Loss = 1.9746e-01, PNorm = 110.4047, GNorm = 0.8763, lr_0 = 8.8693e-04
Loss = 2.0944e-01, PNorm = 110.5328, GNorm = 0.8715, lr_0 = 8.8632e-04
Loss = 1.9111e-01, PNorm = 110.6497, GNorm = 0.7372, lr_0 = 8.8571e-04
Loss = 2.3174e-01, PNorm = 110.7761, GNorm = 0.7898, lr_0 = 8.8510e-04
Loss = 1.6842e-01, PNorm = 110.8995, GNorm = 0.9342, lr_0 = 8.8450e-04
Loss = 2.1121e-01, PNorm = 111.0189, GNorm = 0.8985, lr_0 = 8.8389e-04
Loss = 1.9818e-01, PNorm = 111.1509, GNorm = 0.6845, lr_0 = 8.8329e-04
Loss = 2.0446e-01, PNorm = 111.2833, GNorm = 0.6498, lr_0 = 8.8268e-04
Loss = 2.1490e-01, PNorm = 111.4031, GNorm = 0.8159, lr_0 = 8.8208e-04
Loss = 2.1166e-01, PNorm = 111.5194, GNorm = 1.3521, lr_0 = 8.8147e-04
Loss = 2.1795e-01, PNorm = 111.6492, GNorm = 0.9792, lr_0 = 8.8087e-04
Loss = 2.1516e-01, PNorm = 111.7833, GNorm = 1.0256, lr_0 = 8.8026e-04
Loss = 2.2081e-01, PNorm = 111.9165, GNorm = 0.8647, lr_0 = 8.7966e-04
Loss = 2.1453e-01, PNorm = 112.0596, GNorm = 0.8736, lr_0 = 8.7906e-04
Loss = 1.9841e-01, PNorm = 112.1851, GNorm = 0.9309, lr_0 = 8.7846e-04
Loss = 2.1407e-01, PNorm = 112.3168, GNorm = 0.9326, lr_0 = 8.7785e-04
Loss = 2.0394e-01, PNorm = 112.4449, GNorm = 0.9423, lr_0 = 8.7725e-04
Loss = 2.0664e-01, PNorm = 112.5812, GNorm = 0.8305, lr_0 = 8.7665e-04
Loss = 1.9390e-01, PNorm = 112.7076, GNorm = 1.0589, lr_0 = 8.7605e-04
Loss = 2.2192e-01, PNorm = 112.8399, GNorm = 1.3233, lr_0 = 8.7545e-04
Loss = 1.8327e-01, PNorm = 112.9621, GNorm = 0.8839, lr_0 = 8.7485e-04
Loss = 2.1389e-01, PNorm = 113.0792, GNorm = 0.7872, lr_0 = 8.7425e-04
Loss = 2.0290e-01, PNorm = 113.1968, GNorm = 0.6681, lr_0 = 8.7365e-04
Loss = 2.1524e-01, PNorm = 113.3329, GNorm = 0.6600, lr_0 = 8.7306e-04
Loss = 2.1058e-01, PNorm = 113.4517, GNorm = 1.2632, lr_0 = 8.7246e-04
Loss = 2.0618e-01, PNorm = 113.5773, GNorm = 0.9356, lr_0 = 8.7186e-04
Loss = 1.8968e-01, PNorm = 113.6981, GNorm = 0.7191, lr_0 = 8.7126e-04
Loss = 2.6403e-01, PNorm = 113.8190, GNorm = 1.3778, lr_0 = 8.7067e-04
Loss = 2.3881e-01, PNorm = 113.9583, GNorm = 1.2216, lr_0 = 8.7007e-04
Loss = 1.9361e-01, PNorm = 114.0936, GNorm = 0.8189, lr_0 = 8.6947e-04
Loss = 2.2886e-01, PNorm = 114.2264, GNorm = 1.7572, lr_0 = 8.6888e-04
Loss = 2.5411e-01, PNorm = 114.3586, GNorm = 0.9206, lr_0 = 8.6828e-04
Loss = 2.1415e-01, PNorm = 114.5006, GNorm = 0.9683, lr_0 = 8.6769e-04
Loss = 2.1587e-01, PNorm = 114.6298, GNorm = 0.7635, lr_0 = 8.6709e-04
Loss = 2.1742e-01, PNorm = 114.7629, GNorm = 1.3700, lr_0 = 8.6650e-04
Loss = 1.9486e-01, PNorm = 114.8893, GNorm = 0.5421, lr_0 = 8.6590e-04
Loss = 2.1126e-01, PNorm = 115.0176, GNorm = 0.5441, lr_0 = 8.6531e-04
Loss = 2.1778e-01, PNorm = 115.1481, GNorm = 0.9900, lr_0 = 8.6472e-04
Loss = 2.1308e-01, PNorm = 115.2796, GNorm = 0.8494, lr_0 = 8.6413e-04
Loss = 2.3382e-01, PNorm = 115.4172, GNorm = 1.0706, lr_0 = 8.6353e-04
Loss = 2.5604e-01, PNorm = 115.5684, GNorm = 0.9502, lr_0 = 8.6294e-04
Loss = 2.1389e-01, PNorm = 115.7015, GNorm = 1.0157, lr_0 = 8.6235e-04
Loss = 2.0209e-01, PNorm = 115.8284, GNorm = 0.7957, lr_0 = 8.6176e-04
Loss = 2.3358e-01, PNorm = 115.9550, GNorm = 0.8619, lr_0 = 8.6117e-04
Loss = 2.6530e-01, PNorm = 116.0724, GNorm = 1.2194, lr_0 = 8.6058e-04
Loss = 2.3924e-01, PNorm = 116.2076, GNorm = 0.8400, lr_0 = 8.5999e-04
Loss = 2.2236e-01, PNorm = 116.3491, GNorm = 0.7776, lr_0 = 8.5940e-04
Loss = 2.1960e-01, PNorm = 116.4705, GNorm = 1.1471, lr_0 = 8.5881e-04
Loss = 2.1269e-01, PNorm = 116.5988, GNorm = 1.3485, lr_0 = 8.5823e-04
Loss = 2.1506e-01, PNorm = 116.7168, GNorm = 0.9604, lr_0 = 8.5764e-04
Loss = 2.3521e-01, PNorm = 116.8381, GNorm = 0.9036, lr_0 = 8.5705e-04
Loss = 1.9893e-01, PNorm = 116.9693, GNorm = 0.7499, lr_0 = 8.5646e-04
Loss = 2.2740e-01, PNorm = 117.0966, GNorm = 1.2087, lr_0 = 8.5588e-04
Loss = 2.0745e-01, PNorm = 117.2273, GNorm = 0.9884, lr_0 = 8.5529e-04
Loss = 1.9907e-01, PNorm = 117.3536, GNorm = 0.6410, lr_0 = 8.5470e-04
Loss = 2.0204e-01, PNorm = 117.4796, GNorm = 0.6882, lr_0 = 8.5412e-04
Loss = 2.1205e-01, PNorm = 117.5959, GNorm = 1.2767, lr_0 = 8.5353e-04
Loss = 2.1002e-01, PNorm = 117.7105, GNorm = 0.7664, lr_0 = 8.5295e-04
Loss = 2.2159e-01, PNorm = 117.8331, GNorm = 1.4726, lr_0 = 8.5236e-04
Loss = 2.0534e-01, PNorm = 117.9562, GNorm = 0.9090, lr_0 = 8.5178e-04
Loss = 2.1281e-01, PNorm = 118.0966, GNorm = 0.9019, lr_0 = 8.5120e-04
Loss = 2.0767e-01, PNorm = 118.2296, GNorm = 0.8758, lr_0 = 8.5061e-04
Loss = 2.4913e-01, PNorm = 118.3654, GNorm = 1.6583, lr_0 = 8.5003e-04
Loss = 2.3354e-01, PNorm = 118.5037, GNorm = 1.0522, lr_0 = 8.4945e-04
Loss = 2.1369e-01, PNorm = 118.6299, GNorm = 1.1212, lr_0 = 8.4887e-04
Loss = 2.3027e-01, PNorm = 118.7575, GNorm = 0.7835, lr_0 = 8.4828e-04
Validation mae = 0.123967
Epoch 4
Loss = 1.2169e-01, PNorm = 118.8713, GNorm = 0.7896, lr_0 = 8.4770e-04
Loss = 1.2763e-01, PNorm = 118.9663, GNorm = 0.8947, lr_0 = 8.4712e-04
Loss = 1.1328e-01, PNorm = 119.0368, GNorm = 0.8197, lr_0 = 8.4654e-04
Loss = 1.2570e-01, PNorm = 119.1121, GNorm = 0.7763, lr_0 = 8.4596e-04
Loss = 1.0863e-01, PNorm = 119.1818, GNorm = 0.6522, lr_0 = 8.4538e-04
Loss = 1.2798e-01, PNorm = 119.2559, GNorm = 0.6232, lr_0 = 8.4480e-04
Loss = 1.1082e-01, PNorm = 119.3218, GNorm = 0.4786, lr_0 = 8.4423e-04
Loss = 1.1372e-01, PNorm = 119.3908, GNorm = 0.6867, lr_0 = 8.4365e-04
Loss = 1.1060e-01, PNorm = 119.4580, GNorm = 0.6877, lr_0 = 8.4307e-04
Loss = 1.1578e-01, PNorm = 119.5238, GNorm = 0.7078, lr_0 = 8.4249e-04
Loss = 1.0874e-01, PNorm = 119.5856, GNorm = 0.9003, lr_0 = 8.4191e-04
Loss = 1.2347e-01, PNorm = 119.6501, GNorm = 0.5726, lr_0 = 8.4134e-04
Loss = 1.0536e-01, PNorm = 119.7202, GNorm = 0.5318, lr_0 = 8.4076e-04
Loss = 1.1499e-01, PNorm = 119.7783, GNorm = 0.8601, lr_0 = 8.4019e-04
Loss = 1.0172e-01, PNorm = 119.8501, GNorm = 0.5561, lr_0 = 8.3961e-04
Loss = 1.0925e-01, PNorm = 119.9159, GNorm = 0.8983, lr_0 = 8.3903e-04
Loss = 1.0754e-01, PNorm = 119.9838, GNorm = 1.1142, lr_0 = 8.3846e-04
Loss = 1.1350e-01, PNorm = 120.0516, GNorm = 0.5276, lr_0 = 8.3789e-04
Loss = 1.1831e-01, PNorm = 120.1172, GNorm = 0.6936, lr_0 = 8.3731e-04
Loss = 1.2185e-01, PNorm = 120.1960, GNorm = 0.5721, lr_0 = 8.3674e-04
Loss = 1.1568e-01, PNorm = 120.2665, GNorm = 0.6817, lr_0 = 8.3616e-04
Loss = 1.1641e-01, PNorm = 120.3441, GNorm = 0.9647, lr_0 = 8.3559e-04
Loss = 1.2140e-01, PNorm = 120.4212, GNorm = 0.5818, lr_0 = 8.3502e-04
Loss = 1.0379e-01, PNorm = 120.4911, GNorm = 0.8405, lr_0 = 8.3445e-04
Loss = 1.2700e-01, PNorm = 120.5568, GNorm = 0.5122, lr_0 = 8.3388e-04
Loss = 1.1096e-01, PNorm = 120.6286, GNorm = 0.6204, lr_0 = 8.3330e-04
Loss = 1.1282e-01, PNorm = 120.7069, GNorm = 0.7519, lr_0 = 8.3273e-04
Loss = 1.2465e-01, PNorm = 120.7711, GNorm = 0.6845, lr_0 = 8.3216e-04
Loss = 1.2184e-01, PNorm = 120.8522, GNorm = 1.1704, lr_0 = 8.3159e-04
Loss = 1.1207e-01, PNorm = 120.9199, GNorm = 0.5144, lr_0 = 8.3102e-04
Loss = 9.5621e-02, PNorm = 120.9971, GNorm = 0.5855, lr_0 = 8.3045e-04
Loss = 1.2351e-01, PNorm = 121.0714, GNorm = 0.8977, lr_0 = 8.2988e-04
Loss = 1.3206e-01, PNorm = 121.1428, GNorm = 1.8280, lr_0 = 8.2932e-04
Loss = 1.1599e-01, PNorm = 121.2250, GNorm = 0.5968, lr_0 = 8.2875e-04
Loss = 1.0332e-01, PNorm = 121.3010, GNorm = 0.5533, lr_0 = 8.2818e-04
Loss = 1.0641e-01, PNorm = 121.3819, GNorm = 0.5460, lr_0 = 8.2761e-04
Loss = 1.2256e-01, PNorm = 121.4575, GNorm = 0.7605, lr_0 = 8.2705e-04
Loss = 1.3955e-01, PNorm = 121.5506, GNorm = 0.6975, lr_0 = 8.2648e-04
Loss = 1.2393e-01, PNorm = 121.6369, GNorm = 0.8374, lr_0 = 8.2591e-04
Loss = 1.1030e-01, PNorm = 121.7214, GNorm = 0.5997, lr_0 = 8.2535e-04
Loss = 1.4050e-01, PNorm = 121.8096, GNorm = 0.7119, lr_0 = 8.2478e-04
Loss = 1.1536e-01, PNorm = 121.8937, GNorm = 0.7449, lr_0 = 8.2422e-04
Loss = 1.4320e-01, PNorm = 121.9876, GNorm = 0.5608, lr_0 = 8.2365e-04
Loss = 1.2046e-01, PNorm = 122.0729, GNorm = 1.0594, lr_0 = 8.2309e-04
Loss = 1.1868e-01, PNorm = 122.1578, GNorm = 0.9405, lr_0 = 8.2252e-04
Loss = 1.1804e-01, PNorm = 122.2464, GNorm = 0.7825, lr_0 = 8.2196e-04
Loss = 1.3916e-01, PNorm = 122.3324, GNorm = 0.8870, lr_0 = 8.2140e-04
Loss = 1.1946e-01, PNorm = 122.4142, GNorm = 1.0259, lr_0 = 8.2084e-04
Loss = 1.1939e-01, PNorm = 122.4934, GNorm = 1.2841, lr_0 = 8.2027e-04
Loss = 1.0813e-01, PNorm = 122.5774, GNorm = 0.5087, lr_0 = 8.1971e-04
Loss = 1.4218e-01, PNorm = 122.6659, GNorm = 0.6233, lr_0 = 8.1915e-04
Loss = 1.2288e-01, PNorm = 122.7678, GNorm = 0.8235, lr_0 = 8.1859e-04
Loss = 1.4589e-01, PNorm = 122.8600, GNorm = 0.8289, lr_0 = 8.1803e-04
Loss = 1.2545e-01, PNorm = 122.9648, GNorm = 0.6370, lr_0 = 8.1747e-04
Loss = 1.1544e-01, PNorm = 123.0551, GNorm = 0.5300, lr_0 = 8.1691e-04
Loss = 1.4718e-01, PNorm = 123.1506, GNorm = 0.9434, lr_0 = 8.1635e-04
Loss = 1.1093e-01, PNorm = 123.2396, GNorm = 0.6379, lr_0 = 8.1579e-04
Loss = 1.0880e-01, PNorm = 123.3348, GNorm = 0.5513, lr_0 = 8.1523e-04
Loss = 1.2189e-01, PNorm = 123.4219, GNorm = 0.8941, lr_0 = 8.1467e-04
Loss = 1.1976e-01, PNorm = 123.5131, GNorm = 0.7606, lr_0 = 8.1411e-04
Loss = 1.4564e-01, PNorm = 123.6077, GNorm = 0.6659, lr_0 = 8.1355e-04
Loss = 1.1487e-01, PNorm = 123.7009, GNorm = 0.6940, lr_0 = 8.1300e-04
Loss = 1.1356e-01, PNorm = 123.7826, GNorm = 0.7180, lr_0 = 8.1244e-04
Loss = 1.4032e-01, PNorm = 123.8826, GNorm = 0.6722, lr_0 = 8.1188e-04
Loss = 1.3932e-01, PNorm = 123.9793, GNorm = 0.7692, lr_0 = 8.1133e-04
Loss = 1.3666e-01, PNorm = 124.0771, GNorm = 0.6474, lr_0 = 8.1077e-04
Loss = 1.5177e-01, PNorm = 124.1820, GNorm = 1.2656, lr_0 = 8.1022e-04
Loss = 1.2857e-01, PNorm = 124.2744, GNorm = 0.8989, lr_0 = 8.0966e-04
Loss = 1.3809e-01, PNorm = 124.3770, GNorm = 0.6490, lr_0 = 8.0911e-04
Loss = 1.2836e-01, PNorm = 124.4740, GNorm = 0.6190, lr_0 = 8.0855e-04
Loss = 1.2961e-01, PNorm = 124.5700, GNorm = 0.6938, lr_0 = 8.0800e-04
Loss = 1.2097e-01, PNorm = 124.6651, GNorm = 1.1037, lr_0 = 8.0745e-04
Loss = 1.1193e-01, PNorm = 124.7589, GNorm = 0.7474, lr_0 = 8.0689e-04
Loss = 1.4292e-01, PNorm = 124.8552, GNorm = 0.8750, lr_0 = 8.0634e-04
Loss = 1.4915e-01, PNorm = 124.9540, GNorm = 0.8848, lr_0 = 8.0579e-04
Loss = 1.3654e-01, PNorm = 125.0597, GNorm = 0.8908, lr_0 = 8.0523e-04
Loss = 1.3035e-01, PNorm = 125.1573, GNorm = 0.7982, lr_0 = 8.0468e-04
Loss = 1.3595e-01, PNorm = 125.2531, GNorm = 0.6659, lr_0 = 8.0413e-04
Loss = 1.5138e-01, PNorm = 125.3488, GNorm = 0.8975, lr_0 = 8.0358e-04
Loss = 1.5152e-01, PNorm = 125.4498, GNorm = 0.9003, lr_0 = 8.0303e-04
Loss = 1.3843e-01, PNorm = 125.5641, GNorm = 0.8066, lr_0 = 8.0248e-04
Loss = 1.2935e-01, PNorm = 125.6707, GNorm = 0.6379, lr_0 = 8.0193e-04
Loss = 1.3594e-01, PNorm = 125.7758, GNorm = 1.2819, lr_0 = 8.0138e-04
Loss = 1.3402e-01, PNorm = 125.8772, GNorm = 0.8684, lr_0 = 8.0083e-04
Loss = 1.6528e-01, PNorm = 125.9747, GNorm = 0.7909, lr_0 = 8.0028e-04
Loss = 1.4400e-01, PNorm = 126.0877, GNorm = 0.9764, lr_0 = 7.9974e-04
Loss = 1.7092e-01, PNorm = 126.1935, GNorm = 0.9489, lr_0 = 7.9919e-04
Loss = 1.6421e-01, PNorm = 126.3162, GNorm = 0.6410, lr_0 = 7.9864e-04
Loss = 1.5860e-01, PNorm = 126.4415, GNorm = 0.6607, lr_0 = 7.9809e-04
Loss = 1.6818e-01, PNorm = 126.5679, GNorm = 1.6340, lr_0 = 7.9755e-04
Loss = 1.4623e-01, PNorm = 126.6816, GNorm = 0.8633, lr_0 = 7.9700e-04
Loss = 1.4556e-01, PNorm = 126.7936, GNorm = 0.8024, lr_0 = 7.9645e-04
Loss = 1.4090e-01, PNorm = 126.8948, GNorm = 0.5903, lr_0 = 7.9591e-04
Loss = 1.5978e-01, PNorm = 127.0089, GNorm = 0.7558, lr_0 = 7.9536e-04
Loss = 1.3498e-01, PNorm = 127.1213, GNorm = 0.5310, lr_0 = 7.9482e-04
Loss = 1.3755e-01, PNorm = 127.2287, GNorm = 0.6565, lr_0 = 7.9427e-04
Loss = 1.2900e-01, PNorm = 127.3243, GNorm = 0.8178, lr_0 = 7.9373e-04
Loss = 1.1924e-01, PNorm = 127.4133, GNorm = 1.0856, lr_0 = 7.9319e-04
Loss = 1.3329e-01, PNorm = 127.5093, GNorm = 0.9859, lr_0 = 7.9264e-04
Loss = 1.3845e-01, PNorm = 127.6095, GNorm = 0.7501, lr_0 = 7.9210e-04
Loss = 1.2485e-01, PNorm = 127.7038, GNorm = 0.7444, lr_0 = 7.9156e-04
Loss = 1.2367e-01, PNorm = 127.7970, GNorm = 0.7508, lr_0 = 7.9101e-04
Loss = 1.2362e-01, PNorm = 127.8900, GNorm = 0.8082, lr_0 = 7.9047e-04
Loss = 1.2550e-01, PNorm = 127.9795, GNorm = 0.6278, lr_0 = 7.8993e-04
Loss = 1.5010e-01, PNorm = 128.0686, GNorm = 0.7004, lr_0 = 7.8939e-04
Loss = 1.4384e-01, PNorm = 128.1639, GNorm = 0.7489, lr_0 = 7.8885e-04
Loss = 1.2668e-01, PNorm = 128.2533, GNorm = 0.6949, lr_0 = 7.8831e-04
Loss = 1.2559e-01, PNorm = 128.3430, GNorm = 1.1375, lr_0 = 7.8777e-04
Loss = 1.4432e-01, PNorm = 128.4414, GNorm = 0.6713, lr_0 = 7.8723e-04
Loss = 1.3601e-01, PNorm = 128.5397, GNorm = 0.5052, lr_0 = 7.8669e-04
Loss = 1.3732e-01, PNorm = 128.6379, GNorm = 1.0317, lr_0 = 7.8615e-04
Loss = 1.5097e-01, PNorm = 128.7437, GNorm = 0.8549, lr_0 = 7.8561e-04
Loss = 1.3228e-01, PNorm = 128.8494, GNorm = 0.4847, lr_0 = 7.8507e-04
Loss = 1.5345e-01, PNorm = 128.9462, GNorm = 0.7242, lr_0 = 7.8454e-04
Loss = 1.2466e-01, PNorm = 129.0445, GNorm = 1.0302, lr_0 = 7.8400e-04
Loss = 1.6414e-01, PNorm = 129.1427, GNorm = 0.9396, lr_0 = 7.8346e-04
Loss = 1.1162e-01, PNorm = 129.2443, GNorm = 0.7625, lr_0 = 7.8293e-04
Loss = 1.3207e-01, PNorm = 129.3306, GNorm = 0.5804, lr_0 = 7.8239e-04
Loss = 1.5407e-01, PNorm = 129.4336, GNorm = 0.8645, lr_0 = 7.8185e-04
Loss = 1.3040e-01, PNorm = 129.5329, GNorm = 0.6205, lr_0 = 7.8132e-04
Validation mae = 0.123880
Epoch 5
Loss = 7.8133e-02, PNorm = 129.6195, GNorm = 0.5682, lr_0 = 7.8078e-04
Loss = 8.2548e-02, PNorm = 129.6895, GNorm = 0.5020, lr_0 = 7.8025e-04
Loss = 8.6906e-02, PNorm = 129.7488, GNorm = 0.4708, lr_0 = 7.7971e-04
Loss = 9.0618e-02, PNorm = 129.8115, GNorm = 0.4881, lr_0 = 7.7918e-04
Loss = 7.5905e-02, PNorm = 129.8801, GNorm = 0.4744, lr_0 = 7.7864e-04
Loss = 8.4181e-02, PNorm = 129.9421, GNorm = 0.5841, lr_0 = 7.7811e-04
Loss = 8.2189e-02, PNorm = 130.0102, GNorm = 1.0933, lr_0 = 7.7758e-04
Loss = 8.6551e-02, PNorm = 130.0711, GNorm = 0.8458, lr_0 = 7.7705e-04
Loss = 8.3612e-02, PNorm = 130.1293, GNorm = 0.5575, lr_0 = 7.7651e-04
Loss = 9.1180e-02, PNorm = 130.2023, GNorm = 0.3705, lr_0 = 7.7598e-04
Loss = 7.7077e-02, PNorm = 130.2646, GNorm = 0.7612, lr_0 = 7.7545e-04
Loss = 8.3588e-02, PNorm = 130.3393, GNorm = 0.4588, lr_0 = 7.7492e-04
Loss = 8.6234e-02, PNorm = 130.4011, GNorm = 0.5417, lr_0 = 7.7439e-04
Loss = 8.3523e-02, PNorm = 130.4702, GNorm = 0.7390, lr_0 = 7.7386e-04
Loss = 7.7920e-02, PNorm = 130.5325, GNorm = 0.5356, lr_0 = 7.7333e-04
Loss = 8.5576e-02, PNorm = 130.5997, GNorm = 0.3210, lr_0 = 7.7280e-04
Loss = 8.0172e-02, PNorm = 130.6579, GNorm = 0.4084, lr_0 = 7.7227e-04
Loss = 8.9525e-02, PNorm = 130.7235, GNorm = 0.7440, lr_0 = 7.7174e-04
Loss = 8.6783e-02, PNorm = 130.7926, GNorm = 1.1118, lr_0 = 7.7121e-04
Loss = 8.1313e-02, PNorm = 130.8528, GNorm = 0.7533, lr_0 = 7.7068e-04
Loss = 8.9681e-02, PNorm = 130.9211, GNorm = 0.8596, lr_0 = 7.7015e-04
Loss = 8.7496e-02, PNorm = 130.9868, GNorm = 0.5597, lr_0 = 7.6963e-04
Loss = 8.6817e-02, PNorm = 131.0546, GNorm = 0.8238, lr_0 = 7.6910e-04
Loss = 9.8446e-02, PNorm = 131.1273, GNorm = 0.6609, lr_0 = 7.6857e-04
Loss = 7.4550e-02, PNorm = 131.1975, GNorm = 0.3975, lr_0 = 7.6805e-04
Loss = 7.2545e-02, PNorm = 131.2554, GNorm = 0.6524, lr_0 = 7.6752e-04
Loss = 7.5752e-02, PNorm = 131.3220, GNorm = 0.4555, lr_0 = 7.6699e-04
Loss = 7.3577e-02, PNorm = 131.3818, GNorm = 0.6853, lr_0 = 7.6647e-04
Loss = 7.7729e-02, PNorm = 131.4397, GNorm = 0.8244, lr_0 = 7.6594e-04
Loss = 9.6225e-02, PNorm = 131.4972, GNorm = 1.0262, lr_0 = 7.6542e-04
Loss = 8.3033e-02, PNorm = 131.5611, GNorm = 0.5890, lr_0 = 7.6489e-04
Loss = 8.7326e-02, PNorm = 131.6171, GNorm = 0.5992, lr_0 = 7.6437e-04
Loss = 6.8674e-02, PNorm = 131.6832, GNorm = 0.6653, lr_0 = 7.6385e-04
Loss = 7.7487e-02, PNorm = 131.7412, GNorm = 0.3913, lr_0 = 7.6332e-04
Loss = 7.9052e-02, PNorm = 131.8020, GNorm = 0.7014, lr_0 = 7.6280e-04
Loss = 9.0323e-02, PNorm = 131.8683, GNorm = 0.4943, lr_0 = 7.6228e-04
Loss = 8.0372e-02, PNorm = 131.9363, GNorm = 0.5567, lr_0 = 7.6176e-04
Loss = 1.1188e-01, PNorm = 132.0118, GNorm = 0.5761, lr_0 = 7.6123e-04
Loss = 9.3674e-02, PNorm = 132.0803, GNorm = 1.0109, lr_0 = 7.6071e-04
Loss = 7.3598e-02, PNorm = 132.1544, GNorm = 0.6281, lr_0 = 7.6019e-04
Loss = 9.9448e-02, PNorm = 132.2316, GNorm = 0.6700, lr_0 = 7.5967e-04
Loss = 9.3410e-02, PNorm = 132.3012, GNorm = 1.1869, lr_0 = 7.5915e-04
Loss = 9.0413e-02, PNorm = 132.3868, GNorm = 0.5735, lr_0 = 7.5863e-04
Loss = 9.4287e-02, PNorm = 132.4529, GNorm = 0.7434, lr_0 = 7.5811e-04
Loss = 8.3166e-02, PNorm = 132.5272, GNorm = 0.6808, lr_0 = 7.5759e-04
Loss = 8.2764e-02, PNorm = 132.6021, GNorm = 0.5637, lr_0 = 7.5707e-04
Loss = 7.6646e-02, PNorm = 132.6671, GNorm = 0.4190, lr_0 = 7.5655e-04
Loss = 6.6477e-02, PNorm = 132.7379, GNorm = 0.5932, lr_0 = 7.5603e-04
Loss = 8.4771e-02, PNorm = 132.8144, GNorm = 0.5115, lr_0 = 7.5552e-04
Loss = 8.3565e-02, PNorm = 132.8830, GNorm = 0.4352, lr_0 = 7.5500e-04
Loss = 8.9252e-02, PNorm = 132.9518, GNorm = 0.5616, lr_0 = 7.5448e-04
Loss = 1.0056e-01, PNorm = 133.0244, GNorm = 0.4847, lr_0 = 7.5397e-04
Loss = 8.4226e-02, PNorm = 133.0974, GNorm = 0.9477, lr_0 = 7.5345e-04
Loss = 7.3431e-02, PNorm = 133.1725, GNorm = 0.3450, lr_0 = 7.5293e-04
Loss = 8.0812e-02, PNorm = 133.2444, GNorm = 0.7991, lr_0 = 7.5242e-04
Loss = 1.0805e-01, PNorm = 133.3037, GNorm = 0.3662, lr_0 = 7.5190e-04
Loss = 9.7480e-02, PNorm = 133.3746, GNorm = 0.4923, lr_0 = 7.5139e-04
Loss = 9.5713e-02, PNorm = 133.4539, GNorm = 0.9097, lr_0 = 7.5087e-04
Loss = 9.0040e-02, PNorm = 133.5241, GNorm = 0.7035, lr_0 = 7.5036e-04
Loss = 8.1097e-02, PNorm = 133.5922, GNorm = 0.4194, lr_0 = 7.4984e-04
Loss = 7.9739e-02, PNorm = 133.6614, GNorm = 0.5421, lr_0 = 7.4933e-04
Loss = 9.0149e-02, PNorm = 133.7358, GNorm = 0.8735, lr_0 = 7.4882e-04
Loss = 9.0725e-02, PNorm = 133.8074, GNorm = 0.5606, lr_0 = 7.4830e-04
Loss = 9.1861e-02, PNorm = 133.8828, GNorm = 0.4709, lr_0 = 7.4779e-04
Loss = 9.1913e-02, PNorm = 133.9615, GNorm = 0.4558, lr_0 = 7.4728e-04
Loss = 7.8944e-02, PNorm = 134.0414, GNorm = 0.5445, lr_0 = 7.4677e-04
Loss = 9.6641e-02, PNorm = 134.1202, GNorm = 0.6283, lr_0 = 7.4625e-04
Loss = 1.0326e-01, PNorm = 134.1987, GNorm = 0.8092, lr_0 = 7.4574e-04
Loss = 8.8104e-02, PNorm = 134.2854, GNorm = 0.6006, lr_0 = 7.4523e-04
Loss = 8.2134e-02, PNorm = 134.3621, GNorm = 0.5185, lr_0 = 7.4472e-04
Loss = 9.5064e-02, PNorm = 134.4359, GNorm = 0.5582, lr_0 = 7.4421e-04
Loss = 8.4485e-02, PNorm = 134.5107, GNorm = 0.6631, lr_0 = 7.4370e-04
Loss = 8.1619e-02, PNorm = 134.5800, GNorm = 0.4450, lr_0 = 7.4319e-04
Loss = 7.6371e-02, PNorm = 134.6559, GNorm = 0.5483, lr_0 = 7.4268e-04
Loss = 8.3191e-02, PNorm = 134.7324, GNorm = 0.7964, lr_0 = 7.4217e-04
Loss = 8.9132e-02, PNorm = 134.8102, GNorm = 0.4683, lr_0 = 7.4167e-04
Loss = 9.0424e-02, PNorm = 134.8923, GNorm = 0.5454, lr_0 = 7.4116e-04
Loss = 8.7777e-02, PNorm = 134.9785, GNorm = 0.5372, lr_0 = 7.4065e-04
Loss = 9.8203e-02, PNorm = 135.0657, GNorm = 0.4066, lr_0 = 7.4014e-04
Loss = 1.1188e-01, PNorm = 135.1410, GNorm = 0.9261, lr_0 = 7.3964e-04
Loss = 6.9718e-02, PNorm = 135.2227, GNorm = 0.5587, lr_0 = 7.3913e-04
Loss = 9.2558e-02, PNorm = 135.2961, GNorm = 0.5606, lr_0 = 7.3862e-04
Loss = 1.1158e-01, PNorm = 135.3745, GNorm = 0.7339, lr_0 = 7.3812e-04
Loss = 9.9573e-02, PNorm = 135.4628, GNorm = 0.6747, lr_0 = 7.3761e-04
Loss = 1.0357e-01, PNorm = 135.5520, GNorm = 0.7779, lr_0 = 7.3711e-04
Loss = 1.2325e-01, PNorm = 135.6368, GNorm = 0.9906, lr_0 = 7.3660e-04
Loss = 8.6664e-02, PNorm = 135.7306, GNorm = 1.0073, lr_0 = 7.3610e-04
Loss = 1.0914e-01, PNorm = 135.8167, GNorm = 0.7008, lr_0 = 7.3559e-04
Loss = 7.8241e-02, PNorm = 135.9012, GNorm = 0.3949, lr_0 = 7.3509e-04
Loss = 7.6950e-02, PNorm = 135.9878, GNorm = 0.4860, lr_0 = 7.3458e-04
Loss = 9.5840e-02, PNorm = 136.0620, GNorm = 0.4897, lr_0 = 7.3408e-04
Loss = 8.2704e-02, PNorm = 136.1409, GNorm = 0.6199, lr_0 = 7.3358e-04
Loss = 8.7533e-02, PNorm = 136.2174, GNorm = 0.7745, lr_0 = 7.3308e-04
Loss = 9.5942e-02, PNorm = 136.2859, GNorm = 0.8722, lr_0 = 7.3257e-04
Loss = 8.2228e-02, PNorm = 136.3629, GNorm = 0.5309, lr_0 = 7.3207e-04
Loss = 8.8332e-02, PNorm = 136.4396, GNorm = 0.5423, lr_0 = 7.3157e-04
Loss = 7.4442e-02, PNorm = 136.5136, GNorm = 0.5985, lr_0 = 7.3107e-04
Loss = 9.3713e-02, PNorm = 136.5934, GNorm = 0.7056, lr_0 = 7.3057e-04
Loss = 9.4696e-02, PNorm = 136.6795, GNorm = 0.6077, lr_0 = 7.3007e-04
Loss = 1.0512e-01, PNorm = 136.7695, GNorm = 1.0728, lr_0 = 7.2957e-04
Loss = 1.0283e-01, PNorm = 136.8645, GNorm = 0.5165, lr_0 = 7.2907e-04
Loss = 8.7406e-02, PNorm = 136.9502, GNorm = 0.8431, lr_0 = 7.2857e-04
Loss = 9.4858e-02, PNorm = 137.0391, GNorm = 0.6147, lr_0 = 7.2807e-04
Loss = 1.0000e-01, PNorm = 137.1296, GNorm = 0.8881, lr_0 = 7.2757e-04
Loss = 9.8325e-02, PNorm = 137.2117, GNorm = 1.7183, lr_0 = 7.2707e-04
Loss = 9.7597e-02, PNorm = 137.2955, GNorm = 0.4917, lr_0 = 7.2657e-04
Loss = 9.3042e-02, PNorm = 137.3789, GNorm = 0.9012, lr_0 = 7.2608e-04
Loss = 1.1389e-01, PNorm = 137.4525, GNorm = 0.8717, lr_0 = 7.2558e-04
Loss = 1.3060e-01, PNorm = 137.5322, GNorm = 1.0199, lr_0 = 7.2508e-04
Loss = 8.9918e-02, PNorm = 137.6174, GNorm = 0.5606, lr_0 = 7.2458e-04
Loss = 9.1255e-02, PNorm = 137.7074, GNorm = 0.5354, lr_0 = 7.2409e-04
Loss = 9.7289e-02, PNorm = 137.7919, GNorm = 0.6079, lr_0 = 7.2359e-04
Loss = 7.9685e-02, PNorm = 137.8761, GNorm = 0.5318, lr_0 = 7.2310e-04
Loss = 1.0820e-01, PNorm = 137.9668, GNorm = 0.8019, lr_0 = 7.2260e-04
Loss = 8.2949e-02, PNorm = 138.0504, GNorm = 0.5381, lr_0 = 7.2211e-04
Loss = 9.9183e-02, PNorm = 138.1344, GNorm = 0.6696, lr_0 = 7.2161e-04
Loss = 9.5289e-02, PNorm = 138.2197, GNorm = 0.7754, lr_0 = 7.2112e-04
Loss = 1.1511e-01, PNorm = 138.3032, GNorm = 1.1693, lr_0 = 7.2062e-04
Loss = 8.0799e-02, PNorm = 138.3841, GNorm = 1.0592, lr_0 = 7.2013e-04
Loss = 1.0976e-01, PNorm = 138.4578, GNorm = 0.5450, lr_0 = 7.1964e-04
Validation mae = 0.123714
Epoch 6
Loss = 6.8889e-02, PNorm = 138.5332, GNorm = 0.3824, lr_0 = 7.1914e-04
Loss = 6.6797e-02, PNorm = 138.5973, GNorm = 0.4481, lr_0 = 7.1865e-04
Loss = 6.2291e-02, PNorm = 138.6564, GNorm = 0.4993, lr_0 = 7.1816e-04
Loss = 6.1829e-02, PNorm = 138.7146, GNorm = 0.4089, lr_0 = 7.1767e-04
Loss = 6.5929e-02, PNorm = 138.7702, GNorm = 0.6190, lr_0 = 7.1717e-04
Loss = 7.6816e-02, PNorm = 138.8251, GNorm = 0.5354, lr_0 = 7.1668e-04
Loss = 6.9823e-02, PNorm = 138.8824, GNorm = 0.5072, lr_0 = 7.1619e-04
Loss = 6.3568e-02, PNorm = 138.9380, GNorm = 0.9055, lr_0 = 7.1570e-04
Loss = 6.7744e-02, PNorm = 138.9935, GNorm = 0.5291, lr_0 = 7.1521e-04
Loss = 6.5372e-02, PNorm = 139.0555, GNorm = 0.5614, lr_0 = 7.1472e-04
Loss = 6.1580e-02, PNorm = 139.1052, GNorm = 0.4241, lr_0 = 7.1423e-04
Loss = 6.2331e-02, PNorm = 139.1581, GNorm = 0.4365, lr_0 = 7.1374e-04
Loss = 6.2046e-02, PNorm = 139.2057, GNorm = 0.4066, lr_0 = 7.1325e-04
Loss = 5.3690e-02, PNorm = 139.2607, GNorm = 0.6417, lr_0 = 7.1277e-04
Loss = 7.3591e-02, PNorm = 139.3017, GNorm = 0.6205, lr_0 = 7.1228e-04
Loss = 5.8743e-02, PNorm = 139.3510, GNorm = 0.6237, lr_0 = 7.1179e-04
Loss = 6.4566e-02, PNorm = 139.4033, GNorm = 0.3732, lr_0 = 7.1130e-04
Loss = 6.4829e-02, PNorm = 139.4498, GNorm = 0.3054, lr_0 = 7.1081e-04
Loss = 6.3873e-02, PNorm = 139.5065, GNorm = 0.7229, lr_0 = 7.1033e-04
Loss = 7.7997e-02, PNorm = 139.5669, GNorm = 0.4980, lr_0 = 7.0984e-04
Loss = 6.7478e-02, PNorm = 139.6200, GNorm = 0.8872, lr_0 = 7.0935e-04
Loss = 7.0978e-02, PNorm = 139.6769, GNorm = 0.9429, lr_0 = 7.0887e-04
Loss = 8.5352e-02, PNorm = 139.7328, GNorm = 0.5962, lr_0 = 7.0838e-04
Loss = 6.8789e-02, PNorm = 139.7911, GNorm = 0.5859, lr_0 = 7.0790e-04
Loss = 6.8026e-02, PNorm = 139.8512, GNorm = 0.8158, lr_0 = 7.0741e-04
Loss = 6.8656e-02, PNorm = 139.9077, GNorm = 0.8990, lr_0 = 7.0693e-04
Loss = 6.2600e-02, PNorm = 139.9665, GNorm = 0.5119, lr_0 = 7.0644e-04
Loss = 6.6468e-02, PNorm = 140.0241, GNorm = 0.3842, lr_0 = 7.0596e-04
Loss = 6.1002e-02, PNorm = 140.0796, GNorm = 0.4983, lr_0 = 7.0548e-04
Loss = 4.8525e-02, PNorm = 140.1323, GNorm = 0.5566, lr_0 = 7.0499e-04
Loss = 5.4153e-02, PNorm = 140.1763, GNorm = 0.4364, lr_0 = 7.0451e-04
Loss = 5.4384e-02, PNorm = 140.2215, GNorm = 0.6086, lr_0 = 7.0403e-04
Loss = 6.4350e-02, PNorm = 140.2721, GNorm = 0.5951, lr_0 = 7.0354e-04
Loss = 5.4644e-02, PNorm = 140.3234, GNorm = 0.4254, lr_0 = 7.0306e-04
Loss = 5.1921e-02, PNorm = 140.3784, GNorm = 0.5673, lr_0 = 7.0258e-04
Loss = 5.5403e-02, PNorm = 140.4267, GNorm = 0.5044, lr_0 = 7.0210e-04
Loss = 6.4237e-02, PNorm = 140.4755, GNorm = 0.3522, lr_0 = 7.0162e-04
Loss = 5.7550e-02, PNorm = 140.5285, GNorm = 0.5797, lr_0 = 7.0114e-04
Loss = 6.2044e-02, PNorm = 140.5875, GNorm = 0.4967, lr_0 = 7.0066e-04
Loss = 5.5540e-02, PNorm = 140.6406, GNorm = 0.3182, lr_0 = 7.0018e-04
Loss = 6.5816e-02, PNorm = 140.6960, GNorm = 0.5648, lr_0 = 6.9970e-04
Loss = 6.3970e-02, PNorm = 140.7560, GNorm = 0.4596, lr_0 = 6.9922e-04
Loss = 5.3715e-02, PNorm = 140.8099, GNorm = 0.4297, lr_0 = 6.9874e-04
Loss = 6.8353e-02, PNorm = 140.8593, GNorm = 0.2869, lr_0 = 6.9826e-04
Loss = 5.3486e-02, PNorm = 140.9092, GNorm = 0.3535, lr_0 = 6.9778e-04
Loss = 5.3666e-02, PNorm = 140.9642, GNorm = 0.3313, lr_0 = 6.9730e-04
Loss = 6.1382e-02, PNorm = 141.0181, GNorm = 0.7584, lr_0 = 6.9683e-04
Loss = 5.7743e-02, PNorm = 141.0749, GNorm = 0.4974, lr_0 = 6.9635e-04
Loss = 6.5542e-02, PNorm = 141.1311, GNorm = 0.5701, lr_0 = 6.9587e-04
Loss = 6.5779e-02, PNorm = 141.1860, GNorm = 0.5366, lr_0 = 6.9540e-04
Loss = 5.5735e-02, PNorm = 141.2392, GNorm = 0.5149, lr_0 = 6.9492e-04
Loss = 6.2460e-02, PNorm = 141.2957, GNorm = 0.6574, lr_0 = 6.9444e-04
Loss = 6.1444e-02, PNorm = 141.3572, GNorm = 0.4560, lr_0 = 6.9397e-04
Loss = 7.4142e-02, PNorm = 141.4166, GNorm = 0.6662, lr_0 = 6.9349e-04
Loss = 6.0651e-02, PNorm = 141.4769, GNorm = 0.4583, lr_0 = 6.9302e-04
Loss = 6.9646e-02, PNorm = 141.5421, GNorm = 0.5357, lr_0 = 6.9254e-04
Loss = 6.5598e-02, PNorm = 141.6012, GNorm = 0.4165, lr_0 = 6.9207e-04
Loss = 7.4423e-02, PNorm = 141.6668, GNorm = 1.0271, lr_0 = 6.9159e-04
Loss = 6.4170e-02, PNorm = 141.7276, GNorm = 0.4914, lr_0 = 6.9112e-04
Loss = 5.8548e-02, PNorm = 141.7884, GNorm = 0.8163, lr_0 = 6.9065e-04
Loss = 6.2896e-02, PNorm = 141.8471, GNorm = 0.6783, lr_0 = 6.9017e-04
Loss = 6.7594e-02, PNorm = 141.9070, GNorm = 0.8204, lr_0 = 6.8970e-04
Loss = 5.2837e-02, PNorm = 141.9649, GNorm = 0.4878, lr_0 = 6.8923e-04
Loss = 6.6293e-02, PNorm = 142.0255, GNorm = 0.6892, lr_0 = 6.8876e-04
Loss = 5.9386e-02, PNorm = 142.0904, GNorm = 0.5932, lr_0 = 6.8828e-04
Loss = 7.2851e-02, PNorm = 142.1563, GNorm = 0.5820, lr_0 = 6.8781e-04
Loss = 5.5482e-02, PNorm = 142.2249, GNorm = 0.5380, lr_0 = 6.8734e-04
Loss = 6.5840e-02, PNorm = 142.2882, GNorm = 0.5944, lr_0 = 6.8687e-04
Loss = 6.5190e-02, PNorm = 142.3583, GNorm = 0.7581, lr_0 = 6.8640e-04
Loss = 6.5998e-02, PNorm = 142.4209, GNorm = 0.3941, lr_0 = 6.8593e-04
Loss = 7.0187e-02, PNorm = 142.4822, GNorm = 0.6008, lr_0 = 6.8546e-04
Loss = 7.7667e-02, PNorm = 142.5495, GNorm = 0.4182, lr_0 = 6.8499e-04
Loss = 5.8019e-02, PNorm = 142.6157, GNorm = 0.4926, lr_0 = 6.8452e-04
Loss = 6.3485e-02, PNorm = 142.6826, GNorm = 0.3774, lr_0 = 6.8405e-04
Loss = 6.6548e-02, PNorm = 142.7457, GNorm = 0.5013, lr_0 = 6.8358e-04
Loss = 5.2495e-02, PNorm = 142.8071, GNorm = 0.6815, lr_0 = 6.8312e-04
Loss = 7.4493e-02, PNorm = 142.8732, GNorm = 0.3990, lr_0 = 6.8265e-04
Loss = 6.5292e-02, PNorm = 142.9379, GNorm = 0.5037, lr_0 = 6.8218e-04
Loss = 8.3955e-02, PNorm = 143.0106, GNorm = 0.6172, lr_0 = 6.8171e-04
Loss = 7.2415e-02, PNorm = 143.0809, GNorm = 0.4420, lr_0 = 6.8125e-04
Loss = 6.8957e-02, PNorm = 143.1462, GNorm = 0.7509, lr_0 = 6.8078e-04
Loss = 6.0358e-02, PNorm = 143.2184, GNorm = 1.0644, lr_0 = 6.8031e-04
Loss = 6.2271e-02, PNorm = 143.2808, GNorm = 0.4906, lr_0 = 6.7985e-04
Loss = 6.1815e-02, PNorm = 143.3437, GNorm = 0.7650, lr_0 = 6.7938e-04
Loss = 6.9868e-02, PNorm = 143.4062, GNorm = 0.6580, lr_0 = 6.7892e-04
Loss = 6.0382e-02, PNorm = 143.4700, GNorm = 0.7428, lr_0 = 6.7845e-04
Loss = 6.7168e-02, PNorm = 143.5330, GNorm = 1.0331, lr_0 = 6.7799e-04
Loss = 6.7255e-02, PNorm = 143.6024, GNorm = 0.8732, lr_0 = 6.7752e-04
Loss = 7.4049e-02, PNorm = 143.6702, GNorm = 0.8040, lr_0 = 6.7706e-04
Loss = 5.2945e-02, PNorm = 143.7342, GNorm = 0.4715, lr_0 = 6.7659e-04
Loss = 5.8458e-02, PNorm = 143.8011, GNorm = 0.6168, lr_0 = 6.7613e-04
Loss = 6.4645e-02, PNorm = 143.8617, GNorm = 0.4778, lr_0 = 6.7567e-04
Loss = 7.3171e-02, PNorm = 143.9285, GNorm = 0.5447, lr_0 = 6.7520e-04
Loss = 6.4125e-02, PNorm = 143.9971, GNorm = 0.5556, lr_0 = 6.7474e-04
Loss = 6.2505e-02, PNorm = 144.0629, GNorm = 0.4495, lr_0 = 6.7428e-04
Loss = 6.0159e-02, PNorm = 144.1287, GNorm = 0.4942, lr_0 = 6.7382e-04
Loss = 6.3867e-02, PNorm = 144.1945, GNorm = 0.5906, lr_0 = 6.7335e-04
Loss = 6.2867e-02, PNorm = 144.2595, GNorm = 0.5552, lr_0 = 6.7289e-04
Loss = 5.4891e-02, PNorm = 144.3223, GNorm = 0.3828, lr_0 = 6.7243e-04
Loss = 7.4261e-02, PNorm = 144.3779, GNorm = 0.3771, lr_0 = 6.7197e-04
Loss = 7.1193e-02, PNorm = 144.4437, GNorm = 1.4747, lr_0 = 6.7151e-04
Loss = 6.3904e-02, PNorm = 144.5137, GNorm = 0.5377, lr_0 = 6.7105e-04
Loss = 6.9547e-02, PNorm = 144.5826, GNorm = 0.9252, lr_0 = 6.7059e-04
Loss = 6.8315e-02, PNorm = 144.6555, GNorm = 0.5335, lr_0 = 6.7013e-04
Loss = 6.8319e-02, PNorm = 144.7251, GNorm = 0.5540, lr_0 = 6.6967e-04
Loss = 6.1842e-02, PNorm = 144.7911, GNorm = 0.9519, lr_0 = 6.6921e-04
Loss = 8.4463e-02, PNorm = 144.8597, GNorm = 0.7228, lr_0 = 6.6876e-04
Loss = 7.3374e-02, PNorm = 144.9362, GNorm = 0.6739, lr_0 = 6.6830e-04
Loss = 7.2075e-02, PNorm = 145.0082, GNorm = 0.4658, lr_0 = 6.6784e-04
Loss = 7.6234e-02, PNorm = 145.0860, GNorm = 0.4291, lr_0 = 6.6738e-04
Loss = 6.5398e-02, PNorm = 145.1524, GNorm = 0.4350, lr_0 = 6.6693e-04
Loss = 8.0440e-02, PNorm = 145.2199, GNorm = 0.4561, lr_0 = 6.6647e-04
Loss = 7.2694e-02, PNorm = 145.2891, GNorm = 0.3938, lr_0 = 6.6601e-04
Loss = 7.4191e-02, PNorm = 145.3601, GNorm = 0.7236, lr_0 = 6.6556e-04
Loss = 7.2372e-02, PNorm = 145.4371, GNorm = 0.5443, lr_0 = 6.6510e-04
Loss = 6.8391e-02, PNorm = 145.5174, GNorm = 0.5394, lr_0 = 6.6464e-04
Loss = 7.1840e-02, PNorm = 145.5927, GNorm = 0.9337, lr_0 = 6.6419e-04
Loss = 6.7703e-02, PNorm = 145.6556, GNorm = 0.3909, lr_0 = 6.6373e-04
Loss = 7.2345e-02, PNorm = 145.7280, GNorm = 0.6105, lr_0 = 6.6328e-04
Loss = 8.3531e-02, PNorm = 145.8011, GNorm = 0.5798, lr_0 = 6.6282e-04
Validation mae = 0.123923
Epoch 7
Loss = 4.7798e-02, PNorm = 145.8684, GNorm = 0.3335, lr_0 = 6.6237e-04
Loss = 5.0125e-02, PNorm = 145.9274, GNorm = 0.5586, lr_0 = 6.6192e-04
Loss = 4.3264e-02, PNorm = 145.9809, GNorm = 0.5881, lr_0 = 6.6146e-04
Loss = 6.2958e-02, PNorm = 146.0219, GNorm = 0.6095, lr_0 = 6.6101e-04
Loss = 4.4546e-02, PNorm = 146.0693, GNorm = 0.4465, lr_0 = 6.6056e-04
Loss = 4.2291e-02, PNorm = 146.1090, GNorm = 0.4610, lr_0 = 6.6011e-04
Loss = 4.3613e-02, PNorm = 146.1492, GNorm = 0.5193, lr_0 = 6.5965e-04
Loss = 5.6802e-02, PNorm = 146.1899, GNorm = 0.5761, lr_0 = 6.5920e-04
Loss = 5.0195e-02, PNorm = 146.2320, GNorm = 0.6548, lr_0 = 6.5875e-04
Loss = 4.3433e-02, PNorm = 146.2736, GNorm = 0.3411, lr_0 = 6.5830e-04
Loss = 4.4923e-02, PNorm = 146.3127, GNorm = 0.3401, lr_0 = 6.5785e-04
Loss = 5.5426e-02, PNorm = 146.3552, GNorm = 0.4461, lr_0 = 6.5740e-04
Loss = 4.3109e-02, PNorm = 146.3960, GNorm = 0.4672, lr_0 = 6.5695e-04
Loss = 4.8051e-02, PNorm = 146.4341, GNorm = 0.3711, lr_0 = 6.5650e-04
Loss = 4.1267e-02, PNorm = 146.4717, GNorm = 0.5018, lr_0 = 6.5605e-04
Loss = 4.6538e-02, PNorm = 146.5146, GNorm = 0.5366, lr_0 = 6.5560e-04
Loss = 5.7485e-02, PNorm = 146.5619, GNorm = 0.7527, lr_0 = 6.5515e-04
Loss = 4.1478e-02, PNorm = 146.6086, GNorm = 0.4921, lr_0 = 6.5470e-04
Loss = 5.0636e-02, PNorm = 146.6516, GNorm = 0.4049, lr_0 = 6.5425e-04
Loss = 5.4225e-02, PNorm = 146.6997, GNorm = 0.5257, lr_0 = 6.5380e-04
Loss = 4.3306e-02, PNorm = 146.7466, GNorm = 1.1662, lr_0 = 6.5335e-04
Loss = 5.0300e-02, PNorm = 146.7912, GNorm = 0.5302, lr_0 = 6.5291e-04
Loss = 5.8148e-02, PNorm = 146.8389, GNorm = 0.3697, lr_0 = 6.5246e-04
Loss = 5.3130e-02, PNorm = 146.8798, GNorm = 0.4261, lr_0 = 6.5201e-04
Loss = 4.8887e-02, PNorm = 146.9244, GNorm = 0.3016, lr_0 = 6.5157e-04
Loss = 5.7167e-02, PNorm = 146.9608, GNorm = 0.3522, lr_0 = 6.5112e-04
Loss = 4.0372e-02, PNorm = 147.0007, GNorm = 0.4212, lr_0 = 6.5067e-04
Loss = 4.5278e-02, PNorm = 147.0413, GNorm = 0.4681, lr_0 = 6.5023e-04
Loss = 5.2298e-02, PNorm = 147.0918, GNorm = 0.6114, lr_0 = 6.4978e-04
Loss = 4.8011e-02, PNorm = 147.1359, GNorm = 0.8028, lr_0 = 6.4934e-04
Loss = 6.3177e-02, PNorm = 147.1785, GNorm = 0.5655, lr_0 = 6.4889e-04
Loss = 4.9383e-02, PNorm = 147.2309, GNorm = 0.3800, lr_0 = 6.4845e-04
Loss = 5.3902e-02, PNorm = 147.2895, GNorm = 0.6456, lr_0 = 6.4800e-04
Loss = 5.6887e-02, PNorm = 147.3442, GNorm = 0.4223, lr_0 = 6.4756e-04
Loss = 4.0336e-02, PNorm = 147.3906, GNorm = 0.4714, lr_0 = 6.4712e-04
Loss = 5.7303e-02, PNorm = 147.4434, GNorm = 0.7067, lr_0 = 6.4667e-04
Loss = 4.5749e-02, PNorm = 147.4849, GNorm = 0.5850, lr_0 = 6.4623e-04
Loss = 5.3205e-02, PNorm = 147.5340, GNorm = 0.2831, lr_0 = 6.4579e-04
Loss = 4.3184e-02, PNorm = 147.5793, GNorm = 0.2658, lr_0 = 6.4534e-04
Loss = 4.7446e-02, PNorm = 147.6239, GNorm = 0.3975, lr_0 = 6.4490e-04
Loss = 4.3263e-02, PNorm = 147.6677, GNorm = 0.7883, lr_0 = 6.4446e-04
Loss = 4.9371e-02, PNorm = 147.7050, GNorm = 0.4074, lr_0 = 6.4402e-04
Loss = 4.7125e-02, PNorm = 147.7553, GNorm = 0.5401, lr_0 = 6.4358e-04
Loss = 4.7538e-02, PNorm = 147.8071, GNorm = 0.7426, lr_0 = 6.4314e-04
Loss = 4.6303e-02, PNorm = 147.8588, GNorm = 0.5336, lr_0 = 6.4270e-04
Loss = 5.9014e-02, PNorm = 147.9062, GNorm = 1.6799, lr_0 = 6.4226e-04
Loss = 4.4260e-02, PNorm = 147.9605, GNorm = 0.5056, lr_0 = 6.4182e-04
Loss = 4.7744e-02, PNorm = 148.0048, GNorm = 0.4554, lr_0 = 6.4138e-04
Loss = 5.5639e-02, PNorm = 148.0501, GNorm = 0.6307, lr_0 = 6.4094e-04
Loss = 4.5093e-02, PNorm = 148.1021, GNorm = 0.6186, lr_0 = 6.4050e-04
Loss = 4.9715e-02, PNorm = 148.1474, GNorm = 0.3828, lr_0 = 6.4006e-04
Loss = 4.6417e-02, PNorm = 148.1990, GNorm = 0.9566, lr_0 = 6.3962e-04
Loss = 4.3681e-02, PNorm = 148.2492, GNorm = 0.2675, lr_0 = 6.3918e-04
Loss = 4.4889e-02, PNorm = 148.3025, GNorm = 0.3020, lr_0 = 6.3874e-04
Loss = 5.5348e-02, PNorm = 148.3631, GNorm = 0.4472, lr_0 = 6.3831e-04
Loss = 4.1785e-02, PNorm = 148.4121, GNorm = 0.2476, lr_0 = 6.3787e-04
Loss = 4.6776e-02, PNorm = 148.4600, GNorm = 0.6187, lr_0 = 6.3743e-04
Loss = 4.4724e-02, PNorm = 148.5085, GNorm = 0.3717, lr_0 = 6.3700e-04
Loss = 4.0149e-02, PNorm = 148.5605, GNorm = 0.5357, lr_0 = 6.3656e-04
Loss = 4.6353e-02, PNorm = 148.6121, GNorm = 0.3241, lr_0 = 6.3612e-04
Loss = 5.8361e-02, PNorm = 148.6600, GNorm = 0.4392, lr_0 = 6.3569e-04
Loss = 4.7858e-02, PNorm = 148.7128, GNorm = 0.7618, lr_0 = 6.3525e-04
Loss = 4.2668e-02, PNorm = 148.7628, GNorm = 0.5087, lr_0 = 6.3482e-04
Loss = 4.5259e-02, PNorm = 148.8079, GNorm = 0.6249, lr_0 = 6.3438e-04
Loss = 4.7742e-02, PNorm = 148.8583, GNorm = 0.3843, lr_0 = 6.3395e-04
Loss = 3.8821e-02, PNorm = 148.9119, GNorm = 0.3582, lr_0 = 6.3351e-04
Loss = 5.3409e-02, PNorm = 148.9587, GNorm = 0.5599, lr_0 = 6.3308e-04
Loss = 4.4282e-02, PNorm = 149.0072, GNorm = 0.3932, lr_0 = 6.3265e-04
Loss = 5.7255e-02, PNorm = 149.0616, GNorm = 0.5182, lr_0 = 6.3221e-04
Loss = 5.1430e-02, PNorm = 149.1208, GNorm = 0.6420, lr_0 = 6.3178e-04
Loss = 5.5585e-02, PNorm = 149.1799, GNorm = 0.5238, lr_0 = 6.3135e-04
Loss = 5.8871e-02, PNorm = 149.2385, GNorm = 1.8333, lr_0 = 6.3091e-04
Loss = 5.2482e-02, PNorm = 149.2988, GNorm = 0.6095, lr_0 = 6.3048e-04
Loss = 5.5427e-02, PNorm = 149.3560, GNorm = 2.6188, lr_0 = 6.3005e-04
Loss = 4.7964e-02, PNorm = 149.4069, GNorm = 0.4182, lr_0 = 6.2962e-04
Loss = 5.4653e-02, PNorm = 149.4601, GNorm = 0.4827, lr_0 = 6.2919e-04
Loss = 5.1068e-02, PNorm = 149.5108, GNorm = 0.4565, lr_0 = 6.2876e-04
Loss = 5.4092e-02, PNorm = 149.5657, GNorm = 0.8541, lr_0 = 6.2833e-04
Loss = 5.3387e-02, PNorm = 149.6160, GNorm = 0.7856, lr_0 = 6.2789e-04
Loss = 4.3798e-02, PNorm = 149.6789, GNorm = 0.4019, lr_0 = 6.2746e-04
Loss = 5.6804e-02, PNorm = 149.7301, GNorm = 0.3288, lr_0 = 6.2703e-04
Loss = 4.8158e-02, PNorm = 149.7837, GNorm = 0.6111, lr_0 = 6.2661e-04
Loss = 6.8200e-02, PNorm = 149.8394, GNorm = 0.4308, lr_0 = 6.2618e-04
Loss = 7.1968e-02, PNorm = 149.8941, GNorm = 0.3935, lr_0 = 6.2575e-04
Loss = 5.4657e-02, PNorm = 149.9578, GNorm = 0.5205, lr_0 = 6.2532e-04
Loss = 4.9491e-02, PNorm = 150.0101, GNorm = 0.5722, lr_0 = 6.2489e-04
Loss = 5.1242e-02, PNorm = 150.0679, GNorm = 0.5291, lr_0 = 6.2446e-04
Loss = 4.6675e-02, PNorm = 150.1267, GNorm = 0.6381, lr_0 = 6.2403e-04
Loss = 5.0391e-02, PNorm = 150.1851, GNorm = 0.8508, lr_0 = 6.2361e-04
Loss = 4.5219e-02, PNorm = 150.2385, GNorm = 0.7472, lr_0 = 6.2318e-04
Loss = 5.7641e-02, PNorm = 150.3008, GNorm = 0.4524, lr_0 = 6.2275e-04
Loss = 5.2064e-02, PNorm = 150.3563, GNorm = 0.5220, lr_0 = 6.2233e-04
Loss = 5.3730e-02, PNorm = 150.4167, GNorm = 0.6902, lr_0 = 6.2190e-04
Loss = 6.3614e-02, PNorm = 150.4825, GNorm = 0.5020, lr_0 = 6.2147e-04
Loss = 5.2996e-02, PNorm = 150.5409, GNorm = 0.4474, lr_0 = 6.2105e-04
Loss = 5.5185e-02, PNorm = 150.5986, GNorm = 1.0805, lr_0 = 6.2062e-04
Loss = 5.8450e-02, PNorm = 150.6594, GNorm = 0.7481, lr_0 = 6.2020e-04
Loss = 6.0108e-02, PNorm = 150.7172, GNorm = 0.4762, lr_0 = 6.1977e-04
Loss = 5.9509e-02, PNorm = 150.7735, GNorm = 0.4100, lr_0 = 6.1935e-04
Loss = 5.5750e-02, PNorm = 150.8316, GNorm = 0.4400, lr_0 = 6.1892e-04
Loss = 5.8982e-02, PNorm = 150.8934, GNorm = 0.8927, lr_0 = 6.1850e-04
Loss = 6.6151e-02, PNorm = 150.9455, GNorm = 0.3675, lr_0 = 6.1808e-04
Loss = 5.2317e-02, PNorm = 151.0046, GNorm = 0.3792, lr_0 = 6.1765e-04
Loss = 4.8621e-02, PNorm = 151.0657, GNorm = 0.4176, lr_0 = 6.1723e-04
Loss = 6.6314e-02, PNorm = 151.1198, GNorm = 2.2732, lr_0 = 6.1681e-04
Loss = 5.9532e-02, PNorm = 151.1758, GNorm = 0.5900, lr_0 = 6.1638e-04
Loss = 5.6859e-02, PNorm = 151.2327, GNorm = 0.6649, lr_0 = 6.1596e-04
Loss = 6.4227e-02, PNorm = 151.2979, GNorm = 1.2279, lr_0 = 6.1554e-04
Loss = 5.0573e-02, PNorm = 151.3547, GNorm = 0.5792, lr_0 = 6.1512e-04
Loss = 5.5249e-02, PNorm = 151.4145, GNorm = 0.4196, lr_0 = 6.1470e-04
Loss = 4.9476e-02, PNorm = 151.4694, GNorm = 0.4652, lr_0 = 6.1428e-04
Loss = 4.8379e-02, PNorm = 151.5263, GNorm = 0.6009, lr_0 = 6.1385e-04
Loss = 6.3242e-02, PNorm = 151.5823, GNorm = 0.4237, lr_0 = 6.1343e-04
Loss = 5.9340e-02, PNorm = 151.6435, GNorm = 0.7798, lr_0 = 6.1301e-04
Loss = 5.0021e-02, PNorm = 151.7100, GNorm = 0.5360, lr_0 = 6.1259e-04
Loss = 5.4886e-02, PNorm = 151.7711, GNorm = 0.3715, lr_0 = 6.1217e-04
Loss = 7.3005e-02, PNorm = 151.8240, GNorm = 0.4491, lr_0 = 6.1175e-04
Loss = 6.0284e-02, PNorm = 151.8828, GNorm = 0.7234, lr_0 = 6.1134e-04
Loss = 4.6156e-02, PNorm = 151.9440, GNorm = 0.3788, lr_0 = 6.1092e-04
Loss = 5.5087e-02, PNorm = 152.0084, GNorm = 0.4453, lr_0 = 6.1050e-04
Validation mae = 0.123247
Epoch 8
Loss = 5.0914e-02, PNorm = 152.0631, GNorm = 0.9247, lr_0 = 6.1008e-04
Loss = 4.1665e-02, PNorm = 152.1107, GNorm = 0.8035, lr_0 = 6.0966e-04
Loss = 5.1188e-02, PNorm = 152.1486, GNorm = 0.2441, lr_0 = 6.0924e-04
Loss = 4.1062e-02, PNorm = 152.1894, GNorm = 0.5111, lr_0 = 6.0883e-04
Loss = 3.6268e-02, PNorm = 152.2266, GNorm = 0.4147, lr_0 = 6.0841e-04
Loss = 3.8561e-02, PNorm = 152.2662, GNorm = 0.3478, lr_0 = 6.0799e-04
Loss = 4.4760e-02, PNorm = 152.3034, GNorm = 0.7817, lr_0 = 6.0758e-04
Loss = 4.0846e-02, PNorm = 152.3487, GNorm = 0.2931, lr_0 = 6.0716e-04
Loss = 3.8331e-02, PNorm = 152.3914, GNorm = 0.4705, lr_0 = 6.0674e-04
Loss = 4.4205e-02, PNorm = 152.4310, GNorm = 0.3631, lr_0 = 6.0633e-04
Loss = 3.6380e-02, PNorm = 152.4681, GNorm = 0.4390, lr_0 = 6.0591e-04
Loss = 4.3152e-02, PNorm = 152.5094, GNorm = 0.4624, lr_0 = 6.0550e-04
Loss = 4.3319e-02, PNorm = 152.5455, GNorm = 0.3574, lr_0 = 6.0508e-04
Loss = 4.0720e-02, PNorm = 152.5862, GNorm = 0.3373, lr_0 = 6.0467e-04
Loss = 4.5391e-02, PNorm = 152.6235, GNorm = 0.5077, lr_0 = 6.0425e-04
Loss = 4.7066e-02, PNorm = 152.6647, GNorm = 0.6572, lr_0 = 6.0384e-04
Loss = 3.6614e-02, PNorm = 152.7096, GNorm = 0.3141, lr_0 = 6.0343e-04
Loss = 4.7654e-02, PNorm = 152.7549, GNorm = 0.2056, lr_0 = 6.0301e-04
Loss = 4.4507e-02, PNorm = 152.7991, GNorm = 0.3949, lr_0 = 6.0260e-04
Loss = 3.7513e-02, PNorm = 152.8444, GNorm = 0.3223, lr_0 = 6.0219e-04
Loss = 3.8701e-02, PNorm = 152.8831, GNorm = 0.4807, lr_0 = 6.0178e-04
Loss = 4.3340e-02, PNorm = 152.9221, GNorm = 0.6915, lr_0 = 6.0136e-04
Loss = 3.2477e-02, PNorm = 152.9596, GNorm = 0.2986, lr_0 = 6.0095e-04
Loss = 3.8236e-02, PNorm = 152.9939, GNorm = 0.4420, lr_0 = 6.0054e-04
Loss = 3.5548e-02, PNorm = 153.0321, GNorm = 0.3807, lr_0 = 6.0013e-04
Loss = 4.1483e-02, PNorm = 153.0731, GNorm = 0.4034, lr_0 = 5.9972e-04
Loss = 3.9601e-02, PNorm = 153.1124, GNorm = 0.5331, lr_0 = 5.9931e-04
Loss = 3.5435e-02, PNorm = 153.1487, GNorm = 0.7220, lr_0 = 5.9890e-04
Loss = 4.0567e-02, PNorm = 153.1933, GNorm = 0.4506, lr_0 = 5.9849e-04
Loss = 5.2746e-02, PNorm = 153.2370, GNorm = 0.4243, lr_0 = 5.9808e-04
Loss = 3.4659e-02, PNorm = 153.2819, GNorm = 0.5996, lr_0 = 5.9767e-04
Loss = 4.3021e-02, PNorm = 153.3288, GNorm = 0.5772, lr_0 = 5.9726e-04
Loss = 4.2628e-02, PNorm = 153.3752, GNorm = 0.5618, lr_0 = 5.9685e-04
Loss = 4.5193e-02, PNorm = 153.4143, GNorm = 0.8500, lr_0 = 5.9644e-04
Loss = 4.2525e-02, PNorm = 153.4629, GNorm = 0.6864, lr_0 = 5.9603e-04
Loss = 3.5968e-02, PNorm = 153.5020, GNorm = 0.2572, lr_0 = 5.9562e-04
Loss = 3.8683e-02, PNorm = 153.5482, GNorm = 0.3928, lr_0 = 5.9521e-04
Loss = 4.2783e-02, PNorm = 153.5861, GNorm = 0.7102, lr_0 = 5.9481e-04
Loss = 3.4689e-02, PNorm = 153.6274, GNorm = 0.3347, lr_0 = 5.9440e-04
Loss = 5.0276e-02, PNorm = 153.6732, GNorm = 0.5873, lr_0 = 5.9399e-04
Loss = 3.3172e-02, PNorm = 153.7229, GNorm = 0.3331, lr_0 = 5.9358e-04
Loss = 3.3269e-02, PNorm = 153.7618, GNorm = 0.5566, lr_0 = 5.9318e-04
Loss = 3.7922e-02, PNorm = 153.8038, GNorm = 0.3029, lr_0 = 5.9277e-04
Loss = 3.3868e-02, PNorm = 153.8412, GNorm = 0.3410, lr_0 = 5.9236e-04
Loss = 3.7288e-02, PNorm = 153.8812, GNorm = 0.2778, lr_0 = 5.9196e-04
Loss = 3.5837e-02, PNorm = 153.9223, GNorm = 0.4333, lr_0 = 5.9155e-04
Loss = 4.5286e-02, PNorm = 153.9670, GNorm = 0.4413, lr_0 = 5.9115e-04
Loss = 2.9075e-02, PNorm = 154.0090, GNorm = 0.8082, lr_0 = 5.9074e-04
Loss = 5.5177e-02, PNorm = 154.0488, GNorm = 0.2845, lr_0 = 5.9034e-04
Loss = 3.6257e-02, PNorm = 154.0914, GNorm = 0.3835, lr_0 = 5.8993e-04
Loss = 4.4072e-02, PNorm = 154.1370, GNorm = 0.5062, lr_0 = 5.8953e-04
Loss = 3.3707e-02, PNorm = 154.1787, GNorm = 0.3901, lr_0 = 5.8913e-04
Loss = 5.0187e-02, PNorm = 154.2263, GNorm = 0.8713, lr_0 = 5.8872e-04
Loss = 3.7197e-02, PNorm = 154.2664, GNorm = 0.4003, lr_0 = 5.8832e-04
Loss = 4.1657e-02, PNorm = 154.3154, GNorm = 0.3105, lr_0 = 5.8792e-04
Loss = 4.3277e-02, PNorm = 154.3585, GNorm = 0.3067, lr_0 = 5.8751e-04
Loss = 4.1752e-02, PNorm = 154.4054, GNorm = 0.5679, lr_0 = 5.8711e-04
Loss = 4.5180e-02, PNorm = 154.4508, GNorm = 0.6888, lr_0 = 5.8671e-04
Loss = 4.2341e-02, PNorm = 154.4973, GNorm = 0.3985, lr_0 = 5.8631e-04
Loss = 4.2283e-02, PNorm = 154.5462, GNorm = 0.5516, lr_0 = 5.8591e-04
Loss = 5.1380e-02, PNorm = 154.5962, GNorm = 0.3970, lr_0 = 5.8550e-04
Loss = 3.5542e-02, PNorm = 154.6443, GNorm = 0.3009, lr_0 = 5.8510e-04
Loss = 4.0708e-02, PNorm = 154.6921, GNorm = 0.3467, lr_0 = 5.8470e-04
Loss = 3.5738e-02, PNorm = 154.7441, GNorm = 0.3429, lr_0 = 5.8430e-04
Loss = 3.9195e-02, PNorm = 154.7855, GNorm = 0.2571, lr_0 = 5.8390e-04
Loss = 4.0209e-02, PNorm = 154.8316, GNorm = 0.6134, lr_0 = 5.8350e-04
Loss = 3.6207e-02, PNorm = 154.8794, GNorm = 0.2922, lr_0 = 5.8310e-04
Loss = 3.0194e-02, PNorm = 154.9182, GNorm = 0.3114, lr_0 = 5.8270e-04
Loss = 3.9579e-02, PNorm = 154.9567, GNorm = 0.8253, lr_0 = 5.8230e-04
Loss = 4.2891e-02, PNorm = 154.9990, GNorm = 0.8721, lr_0 = 5.8190e-04
Loss = 4.3715e-02, PNorm = 155.0446, GNorm = 0.4327, lr_0 = 5.8151e-04
Loss = 3.9094e-02, PNorm = 155.0882, GNorm = 0.4006, lr_0 = 5.8111e-04
Loss = 3.7721e-02, PNorm = 155.1349, GNorm = 0.4387, lr_0 = 5.8071e-04
Loss = 4.7936e-02, PNorm = 155.1843, GNorm = 0.3372, lr_0 = 5.8031e-04
Loss = 4.4898e-02, PNorm = 155.2287, GNorm = 0.2839, lr_0 = 5.7991e-04
Loss = 4.6389e-02, PNorm = 155.2786, GNorm = 0.5361, lr_0 = 5.7952e-04
Loss = 5.6408e-02, PNorm = 155.3261, GNorm = 0.4778, lr_0 = 5.7912e-04
Loss = 4.1417e-02, PNorm = 155.3754, GNorm = 0.7415, lr_0 = 5.7872e-04
Loss = 4.1618e-02, PNorm = 155.4275, GNorm = 0.6591, lr_0 = 5.7833e-04
Loss = 3.5590e-02, PNorm = 155.4708, GNorm = 0.3431, lr_0 = 5.7793e-04
Loss = 3.9177e-02, PNorm = 155.5151, GNorm = 1.5015, lr_0 = 5.7753e-04
Loss = 3.5187e-02, PNorm = 155.5521, GNorm = 1.2812, lr_0 = 5.7714e-04
Loss = 4.1480e-02, PNorm = 155.5961, GNorm = 0.3280, lr_0 = 5.7674e-04
Loss = 3.8987e-02, PNorm = 155.6428, GNorm = 0.5214, lr_0 = 5.7635e-04
Loss = 5.0681e-02, PNorm = 155.6943, GNorm = 0.5779, lr_0 = 5.7595e-04
Loss = 4.1733e-02, PNorm = 155.7445, GNorm = 0.3696, lr_0 = 5.7556e-04
Loss = 4.0619e-02, PNorm = 155.7988, GNorm = 0.6324, lr_0 = 5.7516e-04
Loss = 4.2324e-02, PNorm = 155.8461, GNorm = 0.5083, lr_0 = 5.7477e-04
Loss = 4.1677e-02, PNorm = 155.8918, GNorm = 0.4813, lr_0 = 5.7438e-04
Loss = 4.7336e-02, PNorm = 155.9420, GNorm = 0.7607, lr_0 = 5.7398e-04
Loss = 4.1373e-02, PNorm = 155.9932, GNorm = 0.4173, lr_0 = 5.7359e-04
Loss = 4.0464e-02, PNorm = 156.0409, GNorm = 0.3792, lr_0 = 5.7320e-04
Loss = 4.7512e-02, PNorm = 156.0929, GNorm = 0.3375, lr_0 = 5.7280e-04
Loss = 3.5518e-02, PNorm = 156.1382, GNorm = 0.7540, lr_0 = 5.7241e-04
Loss = 4.2413e-02, PNorm = 156.1862, GNorm = 0.3633, lr_0 = 5.7202e-04
Loss = 3.2519e-02, PNorm = 156.2339, GNorm = 0.3062, lr_0 = 5.7163e-04
Loss = 4.8269e-02, PNorm = 156.2785, GNorm = 0.5886, lr_0 = 5.7124e-04
Loss = 3.9704e-02, PNorm = 156.3282, GNorm = 0.6609, lr_0 = 5.7084e-04
Loss = 4.1058e-02, PNorm = 156.3829, GNorm = 0.7709, lr_0 = 5.7045e-04
Loss = 3.6867e-02, PNorm = 156.4338, GNorm = 0.3238, lr_0 = 5.7006e-04
Loss = 4.6395e-02, PNorm = 156.4842, GNorm = 0.3760, lr_0 = 5.6967e-04
Loss = 3.8087e-02, PNorm = 156.5304, GNorm = 0.7463, lr_0 = 5.6928e-04
Loss = 3.7040e-02, PNorm = 156.5806, GNorm = 0.3467, lr_0 = 5.6889e-04
Loss = 4.5891e-02, PNorm = 156.6283, GNorm = 0.3910, lr_0 = 5.6850e-04
Loss = 6.5751e-02, PNorm = 156.6834, GNorm = 0.5970, lr_0 = 5.6811e-04
Loss = 4.0287e-02, PNorm = 156.7440, GNorm = 0.4464, lr_0 = 5.6772e-04
Loss = 4.7424e-02, PNorm = 156.8043, GNorm = 0.4040, lr_0 = 5.6733e-04
Loss = 4.7214e-02, PNorm = 156.8557, GNorm = 0.5371, lr_0 = 5.6695e-04
Loss = 4.1747e-02, PNorm = 156.9085, GNorm = 0.3545, lr_0 = 5.6656e-04
Loss = 4.4220e-02, PNorm = 156.9612, GNorm = 0.4719, lr_0 = 5.6617e-04
Loss = 4.3396e-02, PNorm = 157.0099, GNorm = 0.4860, lr_0 = 5.6578e-04
Loss = 4.8085e-02, PNorm = 157.0623, GNorm = 0.3032, lr_0 = 5.6539e-04
Loss = 3.8352e-02, PNorm = 157.1138, GNorm = 0.4476, lr_0 = 5.6501e-04
Loss = 4.4357e-02, PNorm = 157.1623, GNorm = 0.5118, lr_0 = 5.6462e-04
Loss = 3.5551e-02, PNorm = 157.2094, GNorm = 0.2909, lr_0 = 5.6423e-04
Loss = 4.3933e-02, PNorm = 157.2594, GNorm = 0.7406, lr_0 = 5.6385e-04
Loss = 4.0725e-02, PNorm = 157.3080, GNorm = 0.6108, lr_0 = 5.6346e-04
Loss = 4.0349e-02, PNorm = 157.3612, GNorm = 0.6307, lr_0 = 5.6307e-04
Loss = 5.7124e-02, PNorm = 157.4161, GNorm = 0.6995, lr_0 = 5.6269e-04
Loss = 3.9590e-02, PNorm = 157.4689, GNorm = 0.3620, lr_0 = 5.6230e-04
Validation mae = 0.123631
Epoch 9
Loss = 3.6067e-02, PNorm = 157.5132, GNorm = 0.4102, lr_0 = 5.6192e-04
Loss = 3.4467e-02, PNorm = 157.5546, GNorm = 0.3471, lr_0 = 5.6153e-04
Loss = 4.2291e-02, PNorm = 157.5898, GNorm = 0.3547, lr_0 = 5.6115e-04
Loss = 2.8587e-02, PNorm = 157.6215, GNorm = 0.2626, lr_0 = 5.6076e-04
Loss = 3.7725e-02, PNorm = 157.6534, GNorm = 0.4855, lr_0 = 5.6038e-04
Loss = 2.8273e-02, PNorm = 157.6879, GNorm = 0.3614, lr_0 = 5.6000e-04
Loss = 2.7545e-02, PNorm = 157.7223, GNorm = 0.4434, lr_0 = 5.5961e-04
Loss = 3.3841e-02, PNorm = 157.7538, GNorm = 0.4685, lr_0 = 5.5923e-04
Loss = 3.3077e-02, PNorm = 157.7845, GNorm = 0.3680, lr_0 = 5.5885e-04
Loss = 3.0306e-02, PNorm = 157.8131, GNorm = 0.3917, lr_0 = 5.5846e-04
Loss = 3.5081e-02, PNorm = 157.8406, GNorm = 0.2701, lr_0 = 5.5808e-04
Loss = 3.7904e-02, PNorm = 157.8735, GNorm = 0.2700, lr_0 = 5.5770e-04
Loss = 3.2283e-02, PNorm = 157.9085, GNorm = 0.4331, lr_0 = 5.5732e-04
Loss = 3.6329e-02, PNorm = 157.9472, GNorm = 0.2960, lr_0 = 5.5693e-04
Loss = 2.9659e-02, PNorm = 157.9822, GNorm = 0.3899, lr_0 = 5.5655e-04
Loss = 2.5540e-02, PNorm = 158.0087, GNorm = 0.2616, lr_0 = 5.5617e-04
Loss = 2.7058e-02, PNorm = 158.0345, GNorm = 0.2345, lr_0 = 5.5579e-04
Loss = 2.8082e-02, PNorm = 158.0666, GNorm = 0.7595, lr_0 = 5.5541e-04
Loss = 3.3039e-02, PNorm = 158.1001, GNorm = 0.3390, lr_0 = 5.5503e-04
Loss = 3.6571e-02, PNorm = 158.1298, GNorm = 0.3107, lr_0 = 5.5465e-04
Loss = 2.9999e-02, PNorm = 158.1641, GNorm = 0.4563, lr_0 = 5.5427e-04
Loss = 2.8828e-02, PNorm = 158.1963, GNorm = 0.2665, lr_0 = 5.5389e-04
Loss = 3.1856e-02, PNorm = 158.2271, GNorm = 0.2721, lr_0 = 5.5351e-04
Loss = 2.8988e-02, PNorm = 158.2575, GNorm = 0.3240, lr_0 = 5.5313e-04
Loss = 2.9945e-02, PNorm = 158.2848, GNorm = 0.4233, lr_0 = 5.5275e-04
Loss = 3.2301e-02, PNorm = 158.3151, GNorm = 0.4064, lr_0 = 5.5237e-04
Loss = 3.3121e-02, PNorm = 158.3498, GNorm = 0.2261, lr_0 = 5.5199e-04
Loss = 2.5939e-02, PNorm = 158.3851, GNorm = 0.4223, lr_0 = 5.5162e-04
Loss = 3.1595e-02, PNorm = 158.4160, GNorm = 0.3859, lr_0 = 5.5124e-04
Loss = 2.7373e-02, PNorm = 158.4514, GNorm = 0.3770, lr_0 = 5.5086e-04
Loss = 3.0259e-02, PNorm = 158.4835, GNorm = 0.2733, lr_0 = 5.5048e-04
Loss = 3.3136e-02, PNorm = 158.5162, GNorm = 0.3539, lr_0 = 5.5011e-04
Loss = 3.1282e-02, PNorm = 158.5510, GNorm = 0.4299, lr_0 = 5.4973e-04
Loss = 3.2240e-02, PNorm = 158.5862, GNorm = 0.5024, lr_0 = 5.4935e-04
Loss = 3.1839e-02, PNorm = 158.6210, GNorm = 0.5181, lr_0 = 5.4898e-04
Loss = 3.0510e-02, PNorm = 158.6540, GNorm = 0.2446, lr_0 = 5.4860e-04
Loss = 3.0534e-02, PNorm = 158.6907, GNorm = 0.7376, lr_0 = 5.4822e-04
Loss = 2.9233e-02, PNorm = 158.7283, GNorm = 0.2536, lr_0 = 5.4785e-04
Loss = 4.3249e-02, PNorm = 158.7644, GNorm = 0.2636, lr_0 = 5.4747e-04
Loss = 3.5521e-02, PNorm = 158.8022, GNorm = 0.5487, lr_0 = 5.4710e-04
Loss = 2.8193e-02, PNorm = 158.8374, GNorm = 0.3600, lr_0 = 5.4672e-04
Loss = 3.2797e-02, PNorm = 158.8745, GNorm = 0.7829, lr_0 = 5.4635e-04
Loss = 2.7300e-02, PNorm = 158.9131, GNorm = 0.4464, lr_0 = 5.4597e-04
Loss = 2.9607e-02, PNorm = 158.9496, GNorm = 0.3041, lr_0 = 5.4560e-04
Loss = 4.2246e-02, PNorm = 158.9886, GNorm = 0.5117, lr_0 = 5.4523e-04
Loss = 3.5385e-02, PNorm = 159.0262, GNorm = 0.3613, lr_0 = 5.4485e-04
Loss = 3.7084e-02, PNorm = 159.0638, GNorm = 0.3608, lr_0 = 5.4448e-04
Loss = 3.0704e-02, PNorm = 159.1024, GNorm = 0.4629, lr_0 = 5.4411e-04
Loss = 2.7318e-02, PNorm = 159.1386, GNorm = 0.5361, lr_0 = 5.4373e-04
Loss = 2.9204e-02, PNorm = 159.1744, GNorm = 0.5091, lr_0 = 5.4336e-04
Loss = 3.3839e-02, PNorm = 159.2122, GNorm = 0.4234, lr_0 = 5.4299e-04
Loss = 3.4627e-02, PNorm = 159.2502, GNorm = 0.4510, lr_0 = 5.4262e-04
Loss = 3.5947e-02, PNorm = 159.2877, GNorm = 0.3215, lr_0 = 5.4225e-04
Loss = 3.1599e-02, PNorm = 159.3211, GNorm = 0.2318, lr_0 = 5.4187e-04
Loss = 3.2736e-02, PNorm = 159.3578, GNorm = 0.3929, lr_0 = 5.4150e-04
Loss = 3.0125e-02, PNorm = 159.3954, GNorm = 0.4690, lr_0 = 5.4113e-04
Loss = 3.2444e-02, PNorm = 159.4334, GNorm = 0.4054, lr_0 = 5.4076e-04
Loss = 3.4609e-02, PNorm = 159.4698, GNorm = 0.4822, lr_0 = 5.4039e-04
Loss = 3.0190e-02, PNorm = 159.5099, GNorm = 0.4184, lr_0 = 5.4002e-04
Loss = 2.8416e-02, PNorm = 159.5412, GNorm = 0.3743, lr_0 = 5.3965e-04
Loss = 2.8286e-02, PNorm = 159.5747, GNorm = 0.4674, lr_0 = 5.3928e-04
Loss = 3.5207e-02, PNorm = 159.6108, GNorm = 0.5059, lr_0 = 5.3891e-04
Loss = 3.1639e-02, PNorm = 159.6465, GNorm = 0.3451, lr_0 = 5.3854e-04
Loss = 3.2463e-02, PNorm = 159.6848, GNorm = 0.3344, lr_0 = 5.3817e-04
Loss = 2.7839e-02, PNorm = 159.7234, GNorm = 0.2609, lr_0 = 5.3781e-04
Loss = 3.3498e-02, PNorm = 159.7642, GNorm = 0.4958, lr_0 = 5.3744e-04
Loss = 2.7676e-02, PNorm = 159.8018, GNorm = 0.4771, lr_0 = 5.3707e-04
Loss = 3.7517e-02, PNorm = 159.8390, GNorm = 0.3727, lr_0 = 5.3670e-04
Loss = 3.0059e-02, PNorm = 159.8733, GNorm = 0.4943, lr_0 = 5.3633e-04
Loss = 2.5733e-02, PNorm = 159.9029, GNorm = 0.3163, lr_0 = 5.3597e-04
Loss = 3.0590e-02, PNorm = 159.9430, GNorm = 0.2248, lr_0 = 5.3560e-04
Loss = 4.4490e-02, PNorm = 159.9816, GNorm = 0.6968, lr_0 = 5.3523e-04
Loss = 3.9489e-02, PNorm = 160.0201, GNorm = 0.5407, lr_0 = 5.3486e-04
Loss = 2.9173e-02, PNorm = 160.0615, GNorm = 0.4363, lr_0 = 5.3450e-04
Loss = 3.1428e-02, PNorm = 160.0995, GNorm = 0.2646, lr_0 = 5.3413e-04
Loss = 3.5238e-02, PNorm = 160.1371, GNorm = 0.3097, lr_0 = 5.3377e-04
Loss = 3.9975e-02, PNorm = 160.1767, GNorm = 0.7398, lr_0 = 5.3340e-04
Loss = 2.4790e-02, PNorm = 160.2146, GNorm = 0.3087, lr_0 = 5.3304e-04
Loss = 3.2040e-02, PNorm = 160.2507, GNorm = 0.5748, lr_0 = 5.3267e-04
Loss = 3.5105e-02, PNorm = 160.2905, GNorm = 0.3554, lr_0 = 5.3231e-04
Loss = 2.7752e-02, PNorm = 160.3289, GNorm = 0.3470, lr_0 = 5.3194e-04
Loss = 3.4491e-02, PNorm = 160.3668, GNorm = 0.3453, lr_0 = 5.3158e-04
Loss = 3.3267e-02, PNorm = 160.4032, GNorm = 0.4693, lr_0 = 5.3121e-04
Loss = 2.5577e-02, PNorm = 160.4395, GNorm = 0.3936, lr_0 = 5.3085e-04
Loss = 3.4713e-02, PNorm = 160.4797, GNorm = 0.5063, lr_0 = 5.3048e-04
Loss = 3.2986e-02, PNorm = 160.5190, GNorm = 0.2993, lr_0 = 5.3012e-04
Loss = 3.5768e-02, PNorm = 160.5602, GNorm = 0.3367, lr_0 = 5.2976e-04
Loss = 3.2291e-02, PNorm = 160.6040, GNorm = 0.2886, lr_0 = 5.2939e-04
Loss = 3.1121e-02, PNorm = 160.6484, GNorm = 0.6817, lr_0 = 5.2903e-04
Loss = 3.2170e-02, PNorm = 160.6840, GNorm = 0.4153, lr_0 = 5.2867e-04
Loss = 4.4602e-02, PNorm = 160.7245, GNorm = 0.3383, lr_0 = 5.2831e-04
Loss = 2.7608e-02, PNorm = 160.7608, GNorm = 0.3268, lr_0 = 5.2795e-04
Loss = 3.0715e-02, PNorm = 160.8037, GNorm = 0.5190, lr_0 = 5.2758e-04
Loss = 3.6438e-02, PNorm = 160.8444, GNorm = 0.3357, lr_0 = 5.2722e-04
Loss = 4.1846e-02, PNorm = 160.8780, GNorm = 0.9465, lr_0 = 5.2686e-04
Loss = 2.7537e-02, PNorm = 160.9206, GNorm = 0.4635, lr_0 = 5.2650e-04
Loss = 4.7060e-02, PNorm = 160.9627, GNorm = 0.5830, lr_0 = 5.2614e-04
Loss = 2.9456e-02, PNorm = 161.0122, GNorm = 0.2693, lr_0 = 5.2578e-04
Loss = 3.0013e-02, PNorm = 161.0537, GNorm = 0.2165, lr_0 = 5.2542e-04
Loss = 3.8299e-02, PNorm = 161.0953, GNorm = 0.2644, lr_0 = 5.2506e-04
Loss = 3.0857e-02, PNorm = 161.1383, GNorm = 0.3803, lr_0 = 5.2470e-04
Loss = 2.9438e-02, PNorm = 161.1816, GNorm = 0.3235, lr_0 = 5.2434e-04
Loss = 4.2743e-02, PNorm = 161.2208, GNorm = 0.5032, lr_0 = 5.2398e-04
Loss = 3.4153e-02, PNorm = 161.2618, GNorm = 0.2717, lr_0 = 5.2362e-04
Loss = 3.0593e-02, PNorm = 161.3033, GNorm = 0.5457, lr_0 = 5.2326e-04
Loss = 2.8706e-02, PNorm = 161.3390, GNorm = 0.3349, lr_0 = 5.2290e-04
Loss = 3.8934e-02, PNorm = 161.3755, GNorm = 0.5625, lr_0 = 5.2255e-04
Loss = 3.2180e-02, PNorm = 161.4177, GNorm = 0.2778, lr_0 = 5.2219e-04
Loss = 3.2651e-02, PNorm = 161.4564, GNorm = 0.6981, lr_0 = 5.2183e-04
Loss = 4.2495e-02, PNorm = 161.4893, GNorm = 0.3297, lr_0 = 5.2147e-04
Loss = 3.3846e-02, PNorm = 161.5301, GNorm = 0.3412, lr_0 = 5.2112e-04
Loss = 3.4925e-02, PNorm = 161.5747, GNorm = 0.3268, lr_0 = 5.2076e-04
Loss = 3.7690e-02, PNorm = 161.6153, GNorm = 0.5421, lr_0 = 5.2040e-04
Loss = 3.4671e-02, PNorm = 161.6582, GNorm = 0.4802, lr_0 = 5.2005e-04
Loss = 3.2543e-02, PNorm = 161.7004, GNorm = 0.4580, lr_0 = 5.1969e-04
Loss = 3.5433e-02, PNorm = 161.7437, GNorm = 0.5018, lr_0 = 5.1933e-04
Loss = 3.1769e-02, PNorm = 161.7889, GNorm = 0.6519, lr_0 = 5.1898e-04
Loss = 5.0867e-02, PNorm = 161.8407, GNorm = 0.2685, lr_0 = 5.1862e-04
Loss = 3.6026e-02, PNorm = 161.8882, GNorm = 0.4131, lr_0 = 5.1827e-04
Loss = 3.2955e-02, PNorm = 161.9387, GNorm = 0.4503, lr_0 = 5.1791e-04
Validation mae = 0.122903
Epoch 10
Loss = 2.6843e-02, PNorm = 161.9769, GNorm = 0.6409, lr_0 = 5.1756e-04
Loss = 3.7080e-02, PNorm = 162.0048, GNorm = 0.2995, lr_0 = 5.1720e-04
Loss = 2.5809e-02, PNorm = 162.0310, GNorm = 0.3319, lr_0 = 5.1685e-04
Loss = 2.9346e-02, PNorm = 162.0574, GNorm = 0.2692, lr_0 = 5.1649e-04
Loss = 3.4504e-02, PNorm = 162.0818, GNorm = 0.4532, lr_0 = 5.1614e-04
Loss = 2.5814e-02, PNorm = 162.1093, GNorm = 0.3470, lr_0 = 5.1579e-04
Loss = 3.5685e-02, PNorm = 162.1343, GNorm = 0.2600, lr_0 = 5.1543e-04
Loss = 2.7829e-02, PNorm = 162.1625, GNorm = 0.4965, lr_0 = 5.1508e-04
Loss = 3.3821e-02, PNorm = 162.1879, GNorm = 0.4958, lr_0 = 5.1473e-04
Loss = 3.5247e-02, PNorm = 162.2164, GNorm = 1.1450, lr_0 = 5.1437e-04
Loss = 2.4115e-02, PNorm = 162.2463, GNorm = 0.4883, lr_0 = 5.1402e-04
Loss = 3.4100e-02, PNorm = 162.2691, GNorm = 0.5853, lr_0 = 5.1367e-04
Loss = 2.7641e-02, PNorm = 162.2969, GNorm = 0.2541, lr_0 = 5.1332e-04
Loss = 2.8708e-02, PNorm = 162.3234, GNorm = 0.3428, lr_0 = 5.1297e-04
Loss = 2.4657e-02, PNorm = 162.3517, GNorm = 0.2394, lr_0 = 5.1262e-04
Loss = 3.0447e-02, PNorm = 162.3824, GNorm = 0.2978, lr_0 = 5.1226e-04
Loss = 2.4603e-02, PNorm = 162.4063, GNorm = 0.1716, lr_0 = 5.1191e-04
Loss = 2.3670e-02, PNorm = 162.4293, GNorm = 0.2396, lr_0 = 5.1156e-04
Loss = 2.6688e-02, PNorm = 162.4529, GNorm = 0.5293, lr_0 = 5.1121e-04
Loss = 2.4939e-02, PNorm = 162.4847, GNorm = 0.6500, lr_0 = 5.1086e-04
Loss = 2.1978e-02, PNorm = 162.5113, GNorm = 0.3812, lr_0 = 5.1051e-04
Loss = 2.4791e-02, PNorm = 162.5413, GNorm = 0.2159, lr_0 = 5.1016e-04
Loss = 2.4670e-02, PNorm = 162.5713, GNorm = 0.2724, lr_0 = 5.0981e-04
Loss = 2.6073e-02, PNorm = 162.6011, GNorm = 0.5409, lr_0 = 5.0946e-04
Loss = 2.6759e-02, PNorm = 162.6304, GNorm = 0.4135, lr_0 = 5.0911e-04
Loss = 3.0566e-02, PNorm = 162.6614, GNorm = 0.3393, lr_0 = 5.0877e-04
Loss = 2.2659e-02, PNorm = 162.6934, GNorm = 0.4944, lr_0 = 5.0842e-04
Loss = 2.4450e-02, PNorm = 162.7188, GNorm = 0.2833, lr_0 = 5.0807e-04
Loss = 2.4314e-02, PNorm = 162.7442, GNorm = 0.6094, lr_0 = 5.0772e-04
Loss = 2.6929e-02, PNorm = 162.7746, GNorm = 0.4899, lr_0 = 5.0737e-04
Loss = 2.5384e-02, PNorm = 162.8059, GNorm = 0.3430, lr_0 = 5.0703e-04
Loss = 3.7150e-02, PNorm = 162.8339, GNorm = 0.3225, lr_0 = 5.0668e-04
Loss = 2.6062e-02, PNorm = 162.8625, GNorm = 0.3315, lr_0 = 5.0633e-04
Loss = 2.8717e-02, PNorm = 162.8902, GNorm = 0.2843, lr_0 = 5.0598e-04
Loss = 2.4046e-02, PNorm = 162.9253, GNorm = 0.2584, lr_0 = 5.0564e-04
Loss = 2.5967e-02, PNorm = 162.9577, GNorm = 0.3579, lr_0 = 5.0529e-04
Loss = 2.5673e-02, PNorm = 162.9857, GNorm = 0.2987, lr_0 = 5.0494e-04
Loss = 2.2087e-02, PNorm = 163.0124, GNorm = 0.2050, lr_0 = 5.0460e-04
Loss = 2.5935e-02, PNorm = 163.0391, GNorm = 0.4761, lr_0 = 5.0425e-04
Loss = 3.3140e-02, PNorm = 163.0699, GNorm = 0.3943, lr_0 = 5.0391e-04
Loss = 2.4933e-02, PNorm = 163.1022, GNorm = 0.3572, lr_0 = 5.0356e-04
Loss = 2.7716e-02, PNorm = 163.1350, GNorm = 0.4053, lr_0 = 5.0322e-04
Loss = 2.4990e-02, PNorm = 163.1629, GNorm = 0.2955, lr_0 = 5.0287e-04
Loss = 2.5181e-02, PNorm = 163.1963, GNorm = 0.5816, lr_0 = 5.0253e-04
Loss = 3.2585e-02, PNorm = 163.2204, GNorm = 0.3413, lr_0 = 5.0218e-04
Loss = 3.0758e-02, PNorm = 163.2478, GNorm = 0.3004, lr_0 = 5.0184e-04
Loss = 2.4520e-02, PNorm = 163.2753, GNorm = 0.2850, lr_0 = 5.0150e-04
Loss = 2.9792e-02, PNorm = 163.3108, GNorm = 0.3451, lr_0 = 5.0115e-04
Loss = 2.4088e-02, PNorm = 163.3442, GNorm = 0.2747, lr_0 = 5.0081e-04
Loss = 2.5203e-02, PNorm = 163.3744, GNorm = 0.4989, lr_0 = 5.0047e-04
Loss = 2.9919e-02, PNorm = 163.4078, GNorm = 0.3214, lr_0 = 5.0012e-04
Loss = 2.5759e-02, PNorm = 163.4397, GNorm = 0.4304, lr_0 = 4.9978e-04
Loss = 3.0582e-02, PNorm = 163.4717, GNorm = 0.3571, lr_0 = 4.9944e-04
Loss = 2.7840e-02, PNorm = 163.5054, GNorm = 0.7858, lr_0 = 4.9910e-04
Loss = 2.7142e-02, PNorm = 163.5418, GNorm = 0.4706, lr_0 = 4.9875e-04
Loss = 2.5668e-02, PNorm = 163.5752, GNorm = 0.4798, lr_0 = 4.9841e-04
Loss = 2.2743e-02, PNorm = 163.6055, GNorm = 0.3329, lr_0 = 4.9807e-04
Loss = 2.7459e-02, PNorm = 163.6385, GNorm = 0.2593, lr_0 = 4.9773e-04
Loss = 2.9035e-02, PNorm = 163.6750, GNorm = 0.8102, lr_0 = 4.9739e-04
Loss = 2.7630e-02, PNorm = 163.7102, GNorm = 0.3662, lr_0 = 4.9705e-04
Loss = 2.4042e-02, PNorm = 163.7469, GNorm = 0.2273, lr_0 = 4.9671e-04
Loss = 2.4056e-02, PNorm = 163.7778, GNorm = 0.2615, lr_0 = 4.9637e-04
Loss = 3.1233e-02, PNorm = 163.8106, GNorm = 0.4865, lr_0 = 4.9603e-04
Loss = 3.6101e-02, PNorm = 163.8456, GNorm = 0.4519, lr_0 = 4.9569e-04
Loss = 2.6792e-02, PNorm = 163.8804, GNorm = 0.4312, lr_0 = 4.9535e-04
Loss = 2.3628e-02, PNorm = 163.9148, GNorm = 0.4458, lr_0 = 4.9501e-04
Loss = 2.6029e-02, PNorm = 163.9475, GNorm = 0.2590, lr_0 = 4.9467e-04
Loss = 3.3975e-02, PNorm = 163.9802, GNorm = 0.7553, lr_0 = 4.9433e-04
Loss = 2.4510e-02, PNorm = 164.0105, GNorm = 0.6097, lr_0 = 4.9399e-04
Loss = 2.2329e-02, PNorm = 164.0447, GNorm = 0.1983, lr_0 = 4.9365e-04
Loss = 2.6857e-02, PNorm = 164.0772, GNorm = 0.2421, lr_0 = 4.9332e-04
Loss = 2.8044e-02, PNorm = 164.1170, GNorm = 0.3136, lr_0 = 4.9298e-04
Loss = 2.7429e-02, PNorm = 164.1561, GNorm = 0.4799, lr_0 = 4.9264e-04
Loss = 2.5319e-02, PNorm = 164.1894, GNorm = 0.3098, lr_0 = 4.9230e-04
Loss = 2.6397e-02, PNorm = 164.2191, GNorm = 0.3565, lr_0 = 4.9197e-04
Loss = 3.3306e-02, PNorm = 164.2528, GNorm = 0.4848, lr_0 = 4.9163e-04
Loss = 3.0366e-02, PNorm = 164.2854, GNorm = 0.2823, lr_0 = 4.9129e-04
Loss = 2.3461e-02, PNorm = 164.3206, GNorm = 0.2171, lr_0 = 4.9095e-04
Loss = 2.3426e-02, PNorm = 164.3542, GNorm = 0.3238, lr_0 = 4.9062e-04
Loss = 2.9806e-02, PNorm = 164.3847, GNorm = 0.4542, lr_0 = 4.9028e-04
Loss = 2.5482e-02, PNorm = 164.4136, GNorm = 0.5340, lr_0 = 4.8995e-04
Loss = 2.9429e-02, PNorm = 164.4465, GNorm = 0.3434, lr_0 = 4.8961e-04
Loss = 2.6403e-02, PNorm = 164.4773, GNorm = 0.6578, lr_0 = 4.8928e-04
Loss = 2.3354e-02, PNorm = 164.5093, GNorm = 0.4939, lr_0 = 4.8894e-04
Loss = 4.3705e-02, PNorm = 164.5488, GNorm = 0.2691, lr_0 = 4.8861e-04
Loss = 2.8921e-02, PNorm = 164.5845, GNorm = 1.5551, lr_0 = 4.8827e-04
Loss = 2.3206e-02, PNorm = 164.6186, GNorm = 0.5305, lr_0 = 4.8794e-04
Loss = 3.3267e-02, PNorm = 164.6556, GNorm = 0.4045, lr_0 = 4.8760e-04
Loss = 2.5078e-02, PNorm = 164.6879, GNorm = 0.3165, lr_0 = 4.8727e-04
Loss = 2.6899e-02, PNorm = 164.7242, GNorm = 0.3018, lr_0 = 4.8693e-04
Loss = 2.6905e-02, PNorm = 164.7564, GNorm = 0.2933, lr_0 = 4.8660e-04
Loss = 2.2428e-02, PNorm = 164.7887, GNorm = 0.3536, lr_0 = 4.8627e-04
Loss = 2.1949e-02, PNorm = 164.8158, GNorm = 0.2731, lr_0 = 4.8593e-04
Loss = 3.0609e-02, PNorm = 164.8418, GNorm = 0.2621, lr_0 = 4.8560e-04
Loss = 2.5241e-02, PNorm = 164.8708, GNorm = 0.2965, lr_0 = 4.8527e-04
Loss = 2.2394e-02, PNorm = 164.9021, GNorm = 0.2433, lr_0 = 4.8494e-04
Loss = 2.3921e-02, PNorm = 164.9347, GNorm = 0.2804, lr_0 = 4.8460e-04
Loss = 2.5731e-02, PNorm = 164.9699, GNorm = 0.2646, lr_0 = 4.8427e-04
Loss = 2.4038e-02, PNorm = 165.0057, GNorm = 0.4463, lr_0 = 4.8394e-04
Loss = 3.4278e-02, PNorm = 165.0414, GNorm = 0.3036, lr_0 = 4.8361e-04
Loss = 2.6543e-02, PNorm = 165.0753, GNorm = 0.3129, lr_0 = 4.8328e-04
Loss = 2.7205e-02, PNorm = 165.1128, GNorm = 0.7340, lr_0 = 4.8295e-04
Loss = 2.5081e-02, PNorm = 165.1527, GNorm = 0.3656, lr_0 = 4.8262e-04
Loss = 3.4978e-02, PNorm = 165.1882, GNorm = 0.2660, lr_0 = 4.8228e-04
Loss = 3.0440e-02, PNorm = 165.2235, GNorm = 0.6183, lr_0 = 4.8195e-04
Loss = 2.6107e-02, PNorm = 165.2632, GNorm = 0.3926, lr_0 = 4.8162e-04
Loss = 2.7962e-02, PNorm = 165.3005, GNorm = 0.2891, lr_0 = 4.8129e-04
Loss = 2.7297e-02, PNorm = 165.3361, GNorm = 0.5236, lr_0 = 4.8096e-04
Loss = 2.6637e-02, PNorm = 165.3688, GNorm = 0.3824, lr_0 = 4.8064e-04
Loss = 2.5573e-02, PNorm = 165.4052, GNorm = 0.3336, lr_0 = 4.8031e-04
Loss = 2.3479e-02, PNorm = 165.4421, GNorm = 0.5024, lr_0 = 4.7998e-04
Loss = 2.5313e-02, PNorm = 165.4808, GNorm = 0.5678, lr_0 = 4.7965e-04
Loss = 2.8033e-02, PNorm = 165.5217, GNorm = 0.6732, lr_0 = 4.7932e-04
Loss = 3.1107e-02, PNorm = 165.5577, GNorm = 0.4501, lr_0 = 4.7899e-04
Loss = 2.2714e-02, PNorm = 165.5947, GNorm = 0.3445, lr_0 = 4.7866e-04
Loss = 1.9620e-02, PNorm = 165.6287, GNorm = 0.4318, lr_0 = 4.7833e-04
Loss = 2.7111e-02, PNorm = 165.6603, GNorm = 0.2609, lr_0 = 4.7801e-04
Loss = 3.3325e-02, PNorm = 165.6921, GNorm = 0.5939, lr_0 = 4.7768e-04
Loss = 2.9809e-02, PNorm = 165.7277, GNorm = 0.8286, lr_0 = 4.7735e-04
Loss = 2.5363e-02, PNorm = 165.7655, GNorm = 0.5636, lr_0 = 4.7703e-04
Validation mae = 0.122636
Epoch 11
Loss = 2.3413e-02, PNorm = 165.7949, GNorm = 0.5192, lr_0 = 4.7670e-04
Loss = 3.2744e-02, PNorm = 165.8227, GNorm = 0.3066, lr_0 = 4.7637e-04
Loss = 2.3262e-02, PNorm = 165.8536, GNorm = 0.8058, lr_0 = 4.7605e-04
Loss = 3.2493e-02, PNorm = 165.8840, GNorm = 0.3728, lr_0 = 4.7572e-04
Loss = 2.0086e-02, PNorm = 165.9120, GNorm = 0.7220, lr_0 = 4.7539e-04
Loss = 2.7879e-02, PNorm = 165.9330, GNorm = 0.5312, lr_0 = 4.7507e-04
Loss = 2.0358e-02, PNorm = 165.9583, GNorm = 0.7067, lr_0 = 4.7474e-04
Loss = 2.6009e-02, PNorm = 165.9846, GNorm = 0.4530, lr_0 = 4.7442e-04
Loss = 2.5313e-02, PNorm = 166.0107, GNorm = 0.3715, lr_0 = 4.7409e-04
Loss = 3.1335e-02, PNorm = 166.0349, GNorm = 0.3324, lr_0 = 4.7377e-04
Loss = 2.3745e-02, PNorm = 166.0559, GNorm = 0.3211, lr_0 = 4.7344e-04
Loss = 2.5601e-02, PNorm = 166.0824, GNorm = 0.3340, lr_0 = 4.7312e-04
Loss = 2.0572e-02, PNorm = 166.1030, GNorm = 0.4275, lr_0 = 4.7279e-04
Loss = 2.9772e-02, PNorm = 166.1274, GNorm = 0.6114, lr_0 = 4.7247e-04
Loss = 2.8768e-02, PNorm = 166.1511, GNorm = 0.3533, lr_0 = 4.7215e-04
Loss = 1.9828e-02, PNorm = 166.1759, GNorm = 0.2518, lr_0 = 4.7182e-04
Loss = 1.7605e-02, PNorm = 166.2000, GNorm = 0.3958, lr_0 = 4.7150e-04
Loss = 2.3226e-02, PNorm = 166.2261, GNorm = 1.0327, lr_0 = 4.7118e-04
Loss = 1.9270e-02, PNorm = 166.2464, GNorm = 0.3268, lr_0 = 4.7085e-04
Loss = 2.7277e-02, PNorm = 166.2702, GNorm = 0.4832, lr_0 = 4.7053e-04
Loss = 2.0626e-02, PNorm = 166.2972, GNorm = 0.2743, lr_0 = 4.7021e-04
Loss = 2.4933e-02, PNorm = 166.3214, GNorm = 0.5888, lr_0 = 4.6989e-04
Loss = 2.1610e-02, PNorm = 166.3473, GNorm = 0.3253, lr_0 = 4.6957e-04
Loss = 1.7441e-02, PNorm = 166.3707, GNorm = 0.3358, lr_0 = 4.6924e-04
Loss = 2.3267e-02, PNorm = 166.3902, GNorm = 0.5482, lr_0 = 4.6892e-04
Loss = 1.9910e-02, PNorm = 166.4098, GNorm = 0.2420, lr_0 = 4.6860e-04
Loss = 2.2790e-02, PNorm = 166.4316, GNorm = 0.2855, lr_0 = 4.6828e-04
Loss = 2.2256e-02, PNorm = 166.4550, GNorm = 0.3439, lr_0 = 4.6796e-04
Loss = 2.0330e-02, PNorm = 166.4797, GNorm = 0.3430, lr_0 = 4.6764e-04
Loss = 2.6975e-02, PNorm = 166.5092, GNorm = 0.3844, lr_0 = 4.6732e-04
Loss = 1.9286e-02, PNorm = 166.5369, GNorm = 0.2989, lr_0 = 4.6700e-04
Loss = 2.0876e-02, PNorm = 166.5614, GNorm = 0.4303, lr_0 = 4.6668e-04
Loss = 2.2353e-02, PNorm = 166.5864, GNorm = 0.4996, lr_0 = 4.6636e-04
Loss = 1.6059e-02, PNorm = 166.6054, GNorm = 0.2294, lr_0 = 4.6604e-04
Loss = 1.9627e-02, PNorm = 166.6299, GNorm = 0.1964, lr_0 = 4.6572e-04
Loss = 2.4906e-02, PNorm = 166.6575, GNorm = 0.3832, lr_0 = 4.6540e-04
Loss = 2.0064e-02, PNorm = 166.6840, GNorm = 0.3580, lr_0 = 4.6508e-04
Loss = 2.1500e-02, PNorm = 166.7086, GNorm = 0.4438, lr_0 = 4.6476e-04
Loss = 2.2084e-02, PNorm = 166.7318, GNorm = 0.1838, lr_0 = 4.6445e-04
Loss = 2.6246e-02, PNorm = 166.7585, GNorm = 0.3251, lr_0 = 4.6413e-04
Loss = 2.1793e-02, PNorm = 166.7801, GNorm = 0.6650, lr_0 = 4.6381e-04
Loss = 1.8389e-02, PNorm = 166.8039, GNorm = 0.5642, lr_0 = 4.6349e-04
Loss = 1.8705e-02, PNorm = 166.8282, GNorm = 0.2684, lr_0 = 4.6317e-04
Loss = 3.2440e-02, PNorm = 166.8482, GNorm = 0.2451, lr_0 = 4.6286e-04
Loss = 1.8659e-02, PNorm = 166.8766, GNorm = 0.4935, lr_0 = 4.6254e-04
Loss = 2.0286e-02, PNorm = 166.8987, GNorm = 0.2179, lr_0 = 4.6222e-04
Loss = 1.9714e-02, PNorm = 166.9213, GNorm = 0.2547, lr_0 = 4.6191e-04
Loss = 1.9498e-02, PNorm = 166.9495, GNorm = 0.3386, lr_0 = 4.6159e-04
Loss = 2.5428e-02, PNorm = 166.9740, GNorm = 0.2419, lr_0 = 4.6127e-04
Loss = 2.1015e-02, PNorm = 167.0021, GNorm = 0.3311, lr_0 = 4.6096e-04
Loss = 1.8642e-02, PNorm = 167.0274, GNorm = 0.2299, lr_0 = 4.6064e-04
Loss = 2.1686e-02, PNorm = 167.0505, GNorm = 0.2122, lr_0 = 4.6033e-04
Loss = 2.0380e-02, PNorm = 167.0762, GNorm = 0.5281, lr_0 = 4.6001e-04
Loss = 2.1633e-02, PNorm = 167.1050, GNorm = 0.3492, lr_0 = 4.5970e-04
Loss = 2.1990e-02, PNorm = 167.1338, GNorm = 0.2464, lr_0 = 4.5938e-04
Loss = 2.2225e-02, PNorm = 167.1605, GNorm = 0.1860, lr_0 = 4.5907e-04
Loss = 2.2129e-02, PNorm = 167.1895, GNorm = 0.2225, lr_0 = 4.5875e-04
Loss = 2.2610e-02, PNorm = 167.2163, GNorm = 0.3217, lr_0 = 4.5844e-04
Loss = 1.9014e-02, PNorm = 167.2419, GNorm = 0.2648, lr_0 = 4.5812e-04
Loss = 1.6210e-02, PNorm = 167.2696, GNorm = 0.4153, lr_0 = 4.5781e-04
Loss = 2.0753e-02, PNorm = 167.2950, GNorm = 0.2087, lr_0 = 4.5750e-04
Loss = 2.2052e-02, PNorm = 167.3208, GNorm = 0.2627, lr_0 = 4.5718e-04
Loss = 2.2962e-02, PNorm = 167.3495, GNorm = 0.4731, lr_0 = 4.5687e-04
Loss = 2.0251e-02, PNorm = 167.3762, GNorm = 0.2350, lr_0 = 4.5656e-04
Loss = 2.3582e-02, PNorm = 167.4053, GNorm = 0.1967, lr_0 = 4.5624e-04
Loss = 2.0948e-02, PNorm = 167.4341, GNorm = 0.3234, lr_0 = 4.5593e-04
Loss = 2.0783e-02, PNorm = 167.4595, GNorm = 0.2391, lr_0 = 4.5562e-04
Loss = 2.6369e-02, PNorm = 167.4855, GNorm = 0.2476, lr_0 = 4.5531e-04
Loss = 3.8522e-02, PNorm = 167.5144, GNorm = 0.3234, lr_0 = 4.5499e-04
Loss = 2.3237e-02, PNorm = 167.5417, GNorm = 0.9084, lr_0 = 4.5468e-04
Loss = 2.1403e-02, PNorm = 167.5701, GNorm = 0.4396, lr_0 = 4.5437e-04
Loss = 2.4262e-02, PNorm = 167.5959, GNorm = 0.3112, lr_0 = 4.5406e-04
Loss = 1.9046e-02, PNorm = 167.6237, GNorm = 0.2162, lr_0 = 4.5375e-04
Loss = 2.3013e-02, PNorm = 167.6510, GNorm = 0.4044, lr_0 = 4.5344e-04
Loss = 2.5775e-02, PNorm = 167.6751, GNorm = 0.4179, lr_0 = 4.5313e-04
Loss = 1.8232e-02, PNorm = 167.7030, GNorm = 0.3069, lr_0 = 4.5282e-04
Loss = 2.2576e-02, PNorm = 167.7278, GNorm = 0.2694, lr_0 = 4.5251e-04
Loss = 1.9372e-02, PNorm = 167.7560, GNorm = 0.3509, lr_0 = 4.5220e-04
Loss = 2.1710e-02, PNorm = 167.7805, GNorm = 0.5098, lr_0 = 4.5189e-04
Loss = 2.2170e-02, PNorm = 167.8066, GNorm = 0.1667, lr_0 = 4.5158e-04
Loss = 2.4765e-02, PNorm = 167.8351, GNorm = 0.2200, lr_0 = 4.5127e-04
Loss = 1.9480e-02, PNorm = 167.8631, GNorm = 0.3407, lr_0 = 4.5096e-04
Loss = 2.2762e-02, PNorm = 167.8898, GNorm = 0.3973, lr_0 = 4.5065e-04
Loss = 2.1328e-02, PNorm = 167.9186, GNorm = 0.1981, lr_0 = 4.5034e-04
Loss = 2.2361e-02, PNorm = 167.9496, GNorm = 0.3757, lr_0 = 4.5003e-04
Loss = 2.1743e-02, PNorm = 167.9788, GNorm = 0.2702, lr_0 = 4.4972e-04
Loss = 2.0299e-02, PNorm = 168.0072, GNorm = 0.2658, lr_0 = 4.4942e-04
Loss = 2.0190e-02, PNorm = 168.0345, GNorm = 0.2295, lr_0 = 4.4911e-04
Loss = 2.2360e-02, PNorm = 168.0614, GNorm = 0.4137, lr_0 = 4.4880e-04
Loss = 3.9146e-02, PNorm = 168.0919, GNorm = 0.7897, lr_0 = 4.4849e-04
Loss = 1.8880e-02, PNorm = 168.1227, GNorm = 0.2863, lr_0 = 4.4819e-04
Loss = 2.3676e-02, PNorm = 168.1523, GNorm = 0.7058, lr_0 = 4.4788e-04
Loss = 1.9182e-02, PNorm = 168.1780, GNorm = 0.2603, lr_0 = 4.4757e-04
Loss = 2.7434e-02, PNorm = 168.2087, GNorm = 0.3787, lr_0 = 4.4727e-04
Loss = 2.4423e-02, PNorm = 168.2427, GNorm = 0.2618, lr_0 = 4.4696e-04
Loss = 2.7568e-02, PNorm = 168.2708, GNorm = 0.3278, lr_0 = 4.4665e-04
Loss = 2.3499e-02, PNorm = 168.2971, GNorm = 0.2587, lr_0 = 4.4635e-04
Loss = 1.5104e-02, PNorm = 168.3213, GNorm = 0.2908, lr_0 = 4.4604e-04
Loss = 3.0176e-02, PNorm = 168.3434, GNorm = 0.3764, lr_0 = 4.4574e-04
Loss = 2.0352e-02, PNorm = 168.3709, GNorm = 0.5141, lr_0 = 4.4543e-04
Loss = 2.1548e-02, PNorm = 168.4009, GNorm = 0.3085, lr_0 = 4.4513e-04
Loss = 1.9899e-02, PNorm = 168.4343, GNorm = 0.2589, lr_0 = 4.4482e-04
Loss = 2.4741e-02, PNorm = 168.4664, GNorm = 0.2612, lr_0 = 4.4452e-04
Loss = 2.4094e-02, PNorm = 168.4942, GNorm = 0.3241, lr_0 = 4.4421e-04
Loss = 2.2124e-02, PNorm = 168.5200, GNorm = 0.4345, lr_0 = 4.4391e-04
Loss = 3.3678e-02, PNorm = 168.5450, GNorm = 0.4303, lr_0 = 4.4360e-04
Loss = 2.7239e-02, PNorm = 168.5751, GNorm = 0.2049, lr_0 = 4.4330e-04
Loss = 2.2252e-02, PNorm = 168.6056, GNorm = 0.3378, lr_0 = 4.4299e-04
Loss = 2.2992e-02, PNorm = 168.6368, GNorm = 0.4660, lr_0 = 4.4269e-04
Loss = 2.3404e-02, PNorm = 168.6676, GNorm = 0.2652, lr_0 = 4.4239e-04
Loss = 2.0579e-02, PNorm = 168.6995, GNorm = 0.3310, lr_0 = 4.4209e-04
Loss = 2.6511e-02, PNorm = 168.7310, GNorm = 0.2474, lr_0 = 4.4178e-04
Loss = 2.4925e-02, PNorm = 168.7599, GNorm = 0.6620, lr_0 = 4.4148e-04
Loss = 2.2264e-02, PNorm = 168.7867, GNorm = 0.2688, lr_0 = 4.4118e-04
Loss = 2.2373e-02, PNorm = 168.8188, GNorm = 0.5249, lr_0 = 4.4088e-04
Loss = 3.1480e-02, PNorm = 168.8476, GNorm = 0.2444, lr_0 = 4.4057e-04
Loss = 2.1563e-02, PNorm = 168.8722, GNorm = 0.4816, lr_0 = 4.4027e-04
Loss = 2.7254e-02, PNorm = 168.9019, GNorm = 0.6789, lr_0 = 4.3997e-04
Loss = 2.0597e-02, PNorm = 168.9325, GNorm = 0.2349, lr_0 = 4.3967e-04
Loss = 2.0397e-02, PNorm = 168.9636, GNorm = 0.7416, lr_0 = 4.3937e-04
Validation mae = 0.122191
Epoch 12
Loss = 1.9904e-02, PNorm = 168.9855, GNorm = 0.2011, lr_0 = 4.3907e-04
Loss = 1.8030e-02, PNorm = 169.0077, GNorm = 0.3157, lr_0 = 4.3877e-04
Loss = 2.7644e-02, PNorm = 169.0286, GNorm = 0.3213, lr_0 = 4.3846e-04
Loss = 2.0503e-02, PNorm = 169.0521, GNorm = 0.2306, lr_0 = 4.3816e-04
Loss = 1.6410e-02, PNorm = 169.0747, GNorm = 0.3452, lr_0 = 4.3786e-04
Loss = 1.6957e-02, PNorm = 169.0956, GNorm = 0.5423, lr_0 = 4.3756e-04
Loss = 1.9951e-02, PNorm = 169.1143, GNorm = 0.3005, lr_0 = 4.3726e-04
Loss = 1.8631e-02, PNorm = 169.1332, GNorm = 0.4588, lr_0 = 4.3696e-04
Loss = 1.6559e-02, PNorm = 169.1529, GNorm = 0.3185, lr_0 = 4.3667e-04
Loss = 1.7561e-02, PNorm = 169.1719, GNorm = 0.3712, lr_0 = 4.3637e-04
Loss = 1.9042e-02, PNorm = 169.1931, GNorm = 0.3046, lr_0 = 4.3607e-04
Loss = 1.9532e-02, PNorm = 169.2122, GNorm = 0.3382, lr_0 = 4.3577e-04
Loss = 1.9610e-02, PNorm = 169.2276, GNorm = 0.2218, lr_0 = 4.3547e-04
Loss = 1.9109e-02, PNorm = 169.2486, GNorm = 0.2289, lr_0 = 4.3517e-04
Loss = 2.2650e-02, PNorm = 169.2721, GNorm = 0.2301, lr_0 = 4.3487e-04
Loss = 2.0714e-02, PNorm = 169.2936, GNorm = 0.2207, lr_0 = 4.3458e-04
Loss = 1.8391e-02, PNorm = 169.3141, GNorm = 0.3697, lr_0 = 4.3428e-04
Loss = 1.7767e-02, PNorm = 169.3341, GNorm = 0.1781, lr_0 = 4.3398e-04
Loss = 2.1353e-02, PNorm = 169.3539, GNorm = 0.7667, lr_0 = 4.3368e-04
Loss = 2.0756e-02, PNorm = 169.3680, GNorm = 0.2367, lr_0 = 4.3339e-04
Loss = 2.1248e-02, PNorm = 169.3865, GNorm = 0.2502, lr_0 = 4.3309e-04
Loss = 1.7231e-02, PNorm = 169.4052, GNorm = 0.2445, lr_0 = 4.3279e-04
Loss = 2.0824e-02, PNorm = 169.4270, GNorm = 0.1657, lr_0 = 4.3250e-04
Loss = 1.7831e-02, PNorm = 169.4492, GNorm = 0.5425, lr_0 = 4.3220e-04
Loss = 1.5707e-02, PNorm = 169.4702, GNorm = 0.4908, lr_0 = 4.3190e-04
Loss = 1.5605e-02, PNorm = 169.4896, GNorm = 0.2107, lr_0 = 4.3161e-04
Loss = 1.8507e-02, PNorm = 169.5085, GNorm = 0.5212, lr_0 = 4.3131e-04
Loss = 2.3677e-02, PNorm = 169.5325, GNorm = 0.2953, lr_0 = 4.3102e-04
Loss = 2.0951e-02, PNorm = 169.5572, GNorm = 0.3404, lr_0 = 4.3072e-04
Loss = 1.6211e-02, PNorm = 169.5793, GNorm = 0.3305, lr_0 = 4.3043e-04
Loss = 1.5998e-02, PNorm = 169.5994, GNorm = 0.3631, lr_0 = 4.3013e-04
Loss = 2.1084e-02, PNorm = 169.6180, GNorm = 0.2869, lr_0 = 4.2984e-04
Loss = 1.9683e-02, PNorm = 169.6378, GNorm = 0.5069, lr_0 = 4.2954e-04
Loss = 1.7639e-02, PNorm = 169.6592, GNorm = 0.1837, lr_0 = 4.2925e-04
Loss = 1.6743e-02, PNorm = 169.6796, GNorm = 0.2279, lr_0 = 4.2895e-04
Loss = 1.7913e-02, PNorm = 169.6989, GNorm = 0.4490, lr_0 = 4.2866e-04
Loss = 1.6075e-02, PNorm = 169.7196, GNorm = 0.2454, lr_0 = 4.2837e-04
Loss = 1.7485e-02, PNorm = 169.7397, GNorm = 0.2367, lr_0 = 4.2807e-04
Loss = 1.8850e-02, PNorm = 169.7594, GNorm = 0.2192, lr_0 = 4.2778e-04
Loss = 1.6378e-02, PNorm = 169.7790, GNorm = 0.2131, lr_0 = 4.2749e-04
Loss = 2.1018e-02, PNorm = 169.7978, GNorm = 0.2340, lr_0 = 4.2719e-04
Loss = 1.6332e-02, PNorm = 169.8209, GNorm = 0.3291, lr_0 = 4.2690e-04
Loss = 2.3654e-02, PNorm = 169.8421, GNorm = 0.2096, lr_0 = 4.2661e-04
Loss = 2.0196e-02, PNorm = 169.8653, GNorm = 0.2318, lr_0 = 4.2632e-04
Loss = 1.9688e-02, PNorm = 169.8860, GNorm = 0.2871, lr_0 = 4.2602e-04
Loss = 1.8767e-02, PNorm = 169.9107, GNorm = 0.3247, lr_0 = 4.2573e-04
Loss = 1.5104e-02, PNorm = 169.9319, GNorm = 0.2035, lr_0 = 4.2544e-04
Loss = 2.9763e-02, PNorm = 169.9474, GNorm = 0.2379, lr_0 = 4.2515e-04
Loss = 2.1046e-02, PNorm = 169.9706, GNorm = 0.2310, lr_0 = 4.2486e-04
Loss = 1.7438e-02, PNorm = 169.9968, GNorm = 0.5391, lr_0 = 4.2457e-04
Loss = 3.5088e-02, PNorm = 170.0262, GNorm = 0.2122, lr_0 = 4.2428e-04
Loss = 2.2434e-02, PNorm = 170.0559, GNorm = 0.4191, lr_0 = 4.2399e-04
Loss = 2.0721e-02, PNorm = 170.0847, GNorm = 0.6969, lr_0 = 4.2370e-04
Loss = 1.7880e-02, PNorm = 170.1107, GNorm = 0.3551, lr_0 = 4.2340e-04
Loss = 2.0451e-02, PNorm = 170.1321, GNorm = 0.1727, lr_0 = 4.2311e-04
Loss = 1.8968e-02, PNorm = 170.1549, GNorm = 0.3261, lr_0 = 4.2283e-04
Loss = 2.0109e-02, PNorm = 170.1795, GNorm = 0.3130, lr_0 = 4.2254e-04
Loss = 1.9571e-02, PNorm = 170.2031, GNorm = 0.8562, lr_0 = 4.2225e-04
Loss = 1.8474e-02, PNorm = 170.2311, GNorm = 0.4692, lr_0 = 4.2196e-04
Loss = 1.4839e-02, PNorm = 170.2566, GNorm = 0.1727, lr_0 = 4.2167e-04
Loss = 1.8829e-02, PNorm = 170.2793, GNorm = 0.1910, lr_0 = 4.2138e-04
Loss = 2.0205e-02, PNorm = 170.2984, GNorm = 0.5267, lr_0 = 4.2109e-04
Loss = 1.8585e-02, PNorm = 170.3264, GNorm = 0.4325, lr_0 = 4.2080e-04
Loss = 1.7513e-02, PNorm = 170.3521, GNorm = 0.4221, lr_0 = 4.2051e-04
Loss = 1.5165e-02, PNorm = 170.3776, GNorm = 0.1954, lr_0 = 4.2023e-04
Loss = 1.5943e-02, PNorm = 170.3992, GNorm = 0.4182, lr_0 = 4.1994e-04
Loss = 1.8583e-02, PNorm = 170.4193, GNorm = 0.2032, lr_0 = 4.1965e-04
Loss = 2.0410e-02, PNorm = 170.4407, GNorm = 0.2280, lr_0 = 4.1936e-04
Loss = 2.5291e-02, PNorm = 170.4621, GNorm = 0.2513, lr_0 = 4.1907e-04
Loss = 1.8909e-02, PNorm = 170.4874, GNorm = 0.2787, lr_0 = 4.1879e-04
Loss = 2.1808e-02, PNorm = 170.5108, GNorm = 0.3168, lr_0 = 4.1850e-04
Loss = 1.9865e-02, PNorm = 170.5345, GNorm = 0.2678, lr_0 = 4.1821e-04
Loss = 2.2007e-02, PNorm = 170.5583, GNorm = 0.5310, lr_0 = 4.1793e-04
Loss = 2.6044e-02, PNorm = 170.5824, GNorm = 0.2579, lr_0 = 4.1764e-04
Loss = 2.6885e-02, PNorm = 170.6050, GNorm = 1.2329, lr_0 = 4.1736e-04
Loss = 1.7777e-02, PNorm = 170.6264, GNorm = 0.3456, lr_0 = 4.1707e-04
Loss = 2.5572e-02, PNorm = 170.6566, GNorm = 0.6731, lr_0 = 4.1678e-04
Loss = 1.6186e-02, PNorm = 170.6793, GNorm = 0.2238, lr_0 = 4.1650e-04
Loss = 2.2658e-02, PNorm = 170.7052, GNorm = 0.2775, lr_0 = 4.1621e-04
Loss = 1.7869e-02, PNorm = 170.7287, GNorm = 0.2594, lr_0 = 4.1593e-04
Loss = 2.1073e-02, PNorm = 170.7555, GNorm = 0.6736, lr_0 = 4.1564e-04
Loss = 2.4857e-02, PNorm = 170.7794, GNorm = 0.3841, lr_0 = 4.1536e-04
Loss = 1.5806e-02, PNorm = 170.8044, GNorm = 0.1959, lr_0 = 4.1507e-04
Loss = 2.6192e-02, PNorm = 170.8301, GNorm = 0.2732, lr_0 = 4.1479e-04
Loss = 1.8221e-02, PNorm = 170.8518, GNorm = 0.2979, lr_0 = 4.1450e-04
Loss = 2.2643e-02, PNorm = 170.8753, GNorm = 0.2913, lr_0 = 4.1422e-04
Loss = 2.1173e-02, PNorm = 170.8966, GNorm = 0.3794, lr_0 = 4.1394e-04
Loss = 1.8030e-02, PNorm = 170.9196, GNorm = 0.2521, lr_0 = 4.1365e-04
Loss = 2.3612e-02, PNorm = 170.9420, GNorm = 0.3396, lr_0 = 4.1337e-04
Loss = 1.6448e-02, PNorm = 170.9611, GNorm = 0.3279, lr_0 = 4.1309e-04
Loss = 2.5087e-02, PNorm = 170.9828, GNorm = 0.6435, lr_0 = 4.1280e-04
Loss = 1.6437e-02, PNorm = 171.0080, GNorm = 0.5075, lr_0 = 4.1252e-04
Loss = 2.2050e-02, PNorm = 171.0367, GNorm = 0.3471, lr_0 = 4.1224e-04
Loss = 2.7031e-02, PNorm = 171.0608, GNorm = 0.2450, lr_0 = 4.1196e-04
Loss = 2.7087e-02, PNorm = 171.0868, GNorm = 0.9751, lr_0 = 4.1167e-04
Loss = 2.6587e-02, PNorm = 171.1152, GNorm = 0.2946, lr_0 = 4.1139e-04
Loss = 1.4919e-02, PNorm = 171.1427, GNorm = 0.4169, lr_0 = 4.1111e-04
Loss = 1.7724e-02, PNorm = 171.1713, GNorm = 0.6205, lr_0 = 4.1083e-04
Loss = 1.8692e-02, PNorm = 171.2002, GNorm = 0.1515, lr_0 = 4.1055e-04
Loss = 2.6794e-02, PNorm = 171.2250, GNorm = 0.3451, lr_0 = 4.1027e-04
Loss = 1.9427e-02, PNorm = 171.2510, GNorm = 0.1878, lr_0 = 4.0998e-04
Loss = 1.8425e-02, PNorm = 171.2784, GNorm = 0.3432, lr_0 = 4.0970e-04
Loss = 1.5400e-02, PNorm = 171.3055, GNorm = 0.5210, lr_0 = 4.0942e-04
Loss = 1.9275e-02, PNorm = 171.3301, GNorm = 0.4191, lr_0 = 4.0914e-04
Loss = 2.0703e-02, PNorm = 171.3505, GNorm = 0.2648, lr_0 = 4.0886e-04
Loss = 1.9858e-02, PNorm = 171.3725, GNorm = 0.4510, lr_0 = 4.0858e-04
Loss = 2.3359e-02, PNorm = 171.3980, GNorm = 0.9096, lr_0 = 4.0830e-04
Loss = 1.8251e-02, PNorm = 171.4213, GNorm = 0.2902, lr_0 = 4.0802e-04
Loss = 2.1776e-02, PNorm = 171.4503, GNorm = 0.7489, lr_0 = 4.0774e-04
Loss = 1.7582e-02, PNorm = 171.4773, GNorm = 0.4082, lr_0 = 4.0746e-04
Loss = 2.3088e-02, PNorm = 171.5038, GNorm = 0.2591, lr_0 = 4.0718e-04
Loss = 2.3283e-02, PNorm = 171.5341, GNorm = 0.3403, lr_0 = 4.0691e-04
Loss = 2.0169e-02, PNorm = 171.5604, GNorm = 0.3856, lr_0 = 4.0663e-04
Loss = 1.4273e-02, PNorm = 171.5834, GNorm = 0.1770, lr_0 = 4.0635e-04
Loss = 1.8889e-02, PNorm = 171.6056, GNorm = 0.6163, lr_0 = 4.0607e-04
Loss = 2.2283e-02, PNorm = 171.6262, GNorm = 0.3455, lr_0 = 4.0579e-04
Loss = 2.5061e-02, PNorm = 171.6519, GNorm = 0.4276, lr_0 = 4.0551e-04
Loss = 2.0904e-02, PNorm = 171.6837, GNorm = 0.4418, lr_0 = 4.0524e-04
Loss = 1.7912e-02, PNorm = 171.7112, GNorm = 0.2938, lr_0 = 4.0496e-04
Loss = 1.7787e-02, PNorm = 171.7365, GNorm = 0.3073, lr_0 = 4.0468e-04
Validation mae = 0.121615
Epoch 13
Loss = 2.1341e-02, PNorm = 171.7592, GNorm = 0.1953, lr_0 = 4.0440e-04
Loss = 1.8026e-02, PNorm = 171.7791, GNorm = 0.3303, lr_0 = 4.0413e-04
Loss = 1.6426e-02, PNorm = 171.7951, GNorm = 0.5363, lr_0 = 4.0385e-04
Loss = 1.4717e-02, PNorm = 171.8146, GNorm = 0.1788, lr_0 = 4.0357e-04
Loss = 1.7548e-02, PNorm = 171.8308, GNorm = 0.1834, lr_0 = 4.0330e-04
Loss = 1.4620e-02, PNorm = 171.8482, GNorm = 0.2975, lr_0 = 4.0302e-04
Loss = 1.8007e-02, PNorm = 171.8629, GNorm = 0.3654, lr_0 = 4.0274e-04
Loss = 1.7259e-02, PNorm = 171.8815, GNorm = 0.4996, lr_0 = 4.0247e-04
Loss = 1.5748e-02, PNorm = 171.8986, GNorm = 0.2910, lr_0 = 4.0219e-04
Loss = 2.1386e-02, PNorm = 171.9154, GNorm = 0.3663, lr_0 = 4.0192e-04
Loss = 1.4732e-02, PNorm = 171.9328, GNorm = 0.1666, lr_0 = 4.0164e-04
Loss = 1.8618e-02, PNorm = 171.9516, GNorm = 0.4407, lr_0 = 4.0137e-04
Loss = 2.4054e-02, PNorm = 171.9675, GNorm = 0.2922, lr_0 = 4.0109e-04
Loss = 1.9026e-02, PNorm = 171.9816, GNorm = 0.3651, lr_0 = 4.0082e-04
Loss = 1.5993e-02, PNorm = 171.9958, GNorm = 0.1954, lr_0 = 4.0054e-04
Loss = 1.6330e-02, PNorm = 172.0113, GNorm = 0.1851, lr_0 = 4.0027e-04
Loss = 1.7652e-02, PNorm = 172.0247, GNorm = 0.3073, lr_0 = 3.9999e-04
Loss = 1.4786e-02, PNorm = 172.0406, GNorm = 0.3578, lr_0 = 3.9972e-04
Loss = 1.7602e-02, PNorm = 172.0593, GNorm = 0.2060, lr_0 = 3.9945e-04
Loss = 1.7587e-02, PNorm = 172.0783, GNorm = 0.3534, lr_0 = 3.9917e-04
Loss = 2.0320e-02, PNorm = 172.1001, GNorm = 0.4329, lr_0 = 3.9890e-04
Loss = 1.3875e-02, PNorm = 172.1170, GNorm = 0.2854, lr_0 = 3.9863e-04
Loss = 3.2392e-02, PNorm = 172.1375, GNorm = 0.2117, lr_0 = 3.9835e-04
Loss = 1.9199e-02, PNorm = 172.1582, GNorm = 0.4021, lr_0 = 3.9808e-04
Loss = 1.6260e-02, PNorm = 172.1743, GNorm = 0.4325, lr_0 = 3.9781e-04
Loss = 1.5580e-02, PNorm = 172.1871, GNorm = 0.1999, lr_0 = 3.9753e-04
Loss = 1.6044e-02, PNorm = 172.2031, GNorm = 0.2023, lr_0 = 3.9726e-04
Loss = 1.6248e-02, PNorm = 172.2194, GNorm = 0.1825, lr_0 = 3.9699e-04
Loss = 1.6060e-02, PNorm = 172.2395, GNorm = 0.2690, lr_0 = 3.9672e-04
Loss = 1.9843e-02, PNorm = 172.2636, GNorm = 0.2567, lr_0 = 3.9645e-04
Loss = 1.8242e-02, PNorm = 172.2819, GNorm = 0.2618, lr_0 = 3.9617e-04
Loss = 2.0451e-02, PNorm = 172.3012, GNorm = 0.4745, lr_0 = 3.9590e-04
Loss = 1.6976e-02, PNorm = 172.3190, GNorm = 0.5597, lr_0 = 3.9563e-04
Loss = 1.5429e-02, PNorm = 172.3361, GNorm = 0.3511, lr_0 = 3.9536e-04
Loss = 1.3629e-02, PNorm = 172.3513, GNorm = 0.3065, lr_0 = 3.9509e-04
Loss = 2.3809e-02, PNorm = 172.3723, GNorm = 0.2753, lr_0 = 3.9482e-04
Loss = 1.6117e-02, PNorm = 172.3926, GNorm = 0.3531, lr_0 = 3.9455e-04
Loss = 1.5619e-02, PNorm = 172.4133, GNorm = 0.3335, lr_0 = 3.9428e-04
Loss = 1.6311e-02, PNorm = 172.4319, GNorm = 0.2979, lr_0 = 3.9401e-04
Loss = 1.5276e-02, PNorm = 172.4496, GNorm = 0.3559, lr_0 = 3.9374e-04
Loss = 1.4138e-02, PNorm = 172.4674, GNorm = 0.3605, lr_0 = 3.9347e-04
Loss = 2.0014e-02, PNorm = 172.4835, GNorm = 0.2469, lr_0 = 3.9320e-04
Loss = 1.5303e-02, PNorm = 172.4989, GNorm = 0.2384, lr_0 = 3.9293e-04
Loss = 1.7308e-02, PNorm = 172.5160, GNorm = 0.1822, lr_0 = 3.9266e-04
Loss = 1.3029e-02, PNorm = 172.5341, GNorm = 0.4172, lr_0 = 3.9239e-04
Loss = 1.5631e-02, PNorm = 172.5505, GNorm = 0.6134, lr_0 = 3.9212e-04
Loss = 1.3067e-02, PNorm = 172.5688, GNorm = 0.3098, lr_0 = 3.9185e-04
Loss = 1.9560e-02, PNorm = 172.5876, GNorm = 0.2067, lr_0 = 3.9159e-04
Loss = 1.6686e-02, PNorm = 172.6042, GNorm = 0.4422, lr_0 = 3.9132e-04
Loss = 1.9762e-02, PNorm = 172.6233, GNorm = 0.2639, lr_0 = 3.9105e-04
Loss = 1.3623e-02, PNorm = 172.6421, GNorm = 0.1748, lr_0 = 3.9078e-04
Loss = 1.7209e-02, PNorm = 172.6596, GNorm = 0.5051, lr_0 = 3.9051e-04
Loss = 1.3116e-02, PNorm = 172.6769, GNorm = 0.2467, lr_0 = 3.9025e-04
Loss = 1.4099e-02, PNorm = 172.6975, GNorm = 0.3795, lr_0 = 3.8998e-04
Loss = 1.8316e-02, PNorm = 172.7154, GNorm = 0.4589, lr_0 = 3.8971e-04
Loss = 1.4933e-02, PNorm = 172.7323, GNorm = 0.7260, lr_0 = 3.8945e-04
Loss = 1.2968e-02, PNorm = 172.7505, GNorm = 0.3840, lr_0 = 3.8918e-04
Loss = 1.4570e-02, PNorm = 172.7670, GNorm = 0.2880, lr_0 = 3.8891e-04
Loss = 1.2430e-02, PNorm = 172.7832, GNorm = 0.1745, lr_0 = 3.8865e-04
Loss = 1.6579e-02, PNorm = 172.7988, GNorm = 0.6565, lr_0 = 3.8838e-04
Loss = 2.3218e-02, PNorm = 172.8159, GNorm = 0.1436, lr_0 = 3.8811e-04
Loss = 1.4931e-02, PNorm = 172.8290, GNorm = 0.2686, lr_0 = 3.8785e-04
Loss = 1.4666e-02, PNorm = 172.8479, GNorm = 0.2212, lr_0 = 3.8758e-04
Loss = 1.8403e-02, PNorm = 172.8670, GNorm = 0.2115, lr_0 = 3.8732e-04
Loss = 1.8140e-02, PNorm = 172.8919, GNorm = 0.5757, lr_0 = 3.8705e-04
Loss = 1.6788e-02, PNorm = 172.9153, GNorm = 0.3253, lr_0 = 3.8679e-04
Loss = 1.6665e-02, PNorm = 172.9355, GNorm = 0.7795, lr_0 = 3.8652e-04
Loss = 1.5520e-02, PNorm = 172.9554, GNorm = 0.2909, lr_0 = 3.8626e-04
Loss = 1.6886e-02, PNorm = 172.9717, GNorm = 0.5728, lr_0 = 3.8599e-04
Loss = 1.7709e-02, PNorm = 172.9912, GNorm = 0.3192, lr_0 = 3.8573e-04
Loss = 2.1149e-02, PNorm = 173.0074, GNorm = 0.1983, lr_0 = 3.8546e-04
Loss = 1.3243e-02, PNorm = 173.0272, GNorm = 0.4227, lr_0 = 3.8520e-04
Loss = 1.5274e-02, PNorm = 173.0457, GNorm = 0.4572, lr_0 = 3.8493e-04
Loss = 1.5078e-02, PNorm = 173.0633, GNorm = 0.4894, lr_0 = 3.8467e-04
Loss = 2.9637e-02, PNorm = 173.0888, GNorm = 0.5161, lr_0 = 3.8441e-04
Loss = 1.7197e-02, PNorm = 173.1101, GNorm = 0.1799, lr_0 = 3.8414e-04
Loss = 1.8232e-02, PNorm = 173.1315, GNorm = 0.1505, lr_0 = 3.8388e-04
Loss = 1.4692e-02, PNorm = 173.1543, GNorm = 0.3106, lr_0 = 3.8362e-04
Loss = 2.0930e-02, PNorm = 173.1733, GNorm = 0.5735, lr_0 = 3.8336e-04
Loss = 1.5242e-02, PNorm = 173.1956, GNorm = 0.3536, lr_0 = 3.8309e-04
Loss = 1.5297e-02, PNorm = 173.2158, GNorm = 0.4284, lr_0 = 3.8283e-04
Loss = 1.8602e-02, PNorm = 173.2326, GNorm = 0.4061, lr_0 = 3.8257e-04
Loss = 1.2795e-02, PNorm = 173.2478, GNorm = 0.2146, lr_0 = 3.8231e-04
Loss = 1.5654e-02, PNorm = 173.2655, GNorm = 0.4635, lr_0 = 3.8204e-04
Loss = 2.0292e-02, PNorm = 173.2794, GNorm = 0.2639, lr_0 = 3.8178e-04
Loss = 1.5157e-02, PNorm = 173.2976, GNorm = 0.3861, lr_0 = 3.8152e-04
Loss = 1.8412e-02, PNorm = 173.3212, GNorm = 0.4299, lr_0 = 3.8126e-04
Loss = 1.7936e-02, PNorm = 173.3436, GNorm = 0.1872, lr_0 = 3.8100e-04
Loss = 1.5460e-02, PNorm = 173.3677, GNorm = 0.3594, lr_0 = 3.8074e-04
Loss = 1.3843e-02, PNorm = 173.3890, GNorm = 0.2592, lr_0 = 3.8048e-04
Loss = 1.4929e-02, PNorm = 173.4099, GNorm = 0.4327, lr_0 = 3.8022e-04
Loss = 1.3495e-02, PNorm = 173.4254, GNorm = 0.3920, lr_0 = 3.7995e-04
Loss = 1.6180e-02, PNorm = 173.4457, GNorm = 0.3346, lr_0 = 3.7969e-04
Loss = 2.2983e-02, PNorm = 173.4610, GNorm = 0.3059, lr_0 = 3.7943e-04
Loss = 1.9062e-02, PNorm = 173.4754, GNorm = 0.4035, lr_0 = 3.7917e-04
Loss = 1.5620e-02, PNorm = 173.4943, GNorm = 0.4446, lr_0 = 3.7891e-04
Loss = 1.8516e-02, PNorm = 173.5130, GNorm = 0.1672, lr_0 = 3.7866e-04
Loss = 1.9123e-02, PNorm = 173.5337, GNorm = 0.1883, lr_0 = 3.7840e-04
Loss = 1.3106e-02, PNorm = 173.5530, GNorm = 0.2434, lr_0 = 3.7814e-04
Loss = 1.4564e-02, PNorm = 173.5710, GNorm = 0.2315, lr_0 = 3.7788e-04
Loss = 2.2588e-02, PNorm = 173.5896, GNorm = 0.3255, lr_0 = 3.7762e-04
Loss = 1.1859e-02, PNorm = 173.6099, GNorm = 0.1832, lr_0 = 3.7736e-04
Loss = 2.3969e-02, PNorm = 173.6271, GNorm = 0.5859, lr_0 = 3.7710e-04
Loss = 1.8364e-02, PNorm = 173.6500, GNorm = 0.2561, lr_0 = 3.7684e-04
Loss = 1.9562e-02, PNorm = 173.6697, GNorm = 0.6838, lr_0 = 3.7659e-04
Loss = 2.4444e-02, PNorm = 173.6944, GNorm = 0.2496, lr_0 = 3.7633e-04
Loss = 2.4212e-02, PNorm = 173.7130, GNorm = 0.2611, lr_0 = 3.7607e-04
Loss = 1.7429e-02, PNorm = 173.7357, GNorm = 0.1866, lr_0 = 3.7581e-04
Loss = 1.4883e-02, PNorm = 173.7551, GNorm = 0.4301, lr_0 = 3.7555e-04
Loss = 1.7899e-02, PNorm = 173.7756, GNorm = 0.3146, lr_0 = 3.7530e-04
Loss = 1.3797e-02, PNorm = 173.7955, GNorm = 0.4228, lr_0 = 3.7504e-04
Loss = 1.7601e-02, PNorm = 173.8143, GNorm = 0.3308, lr_0 = 3.7478e-04
Loss = 1.8148e-02, PNorm = 173.8343, GNorm = 0.3306, lr_0 = 3.7453e-04
Loss = 1.7306e-02, PNorm = 173.8569, GNorm = 0.2516, lr_0 = 3.7427e-04
Loss = 1.4705e-02, PNorm = 173.8770, GNorm = 0.2817, lr_0 = 3.7401e-04
Loss = 1.6150e-02, PNorm = 173.8962, GNorm = 0.2415, lr_0 = 3.7376e-04
Loss = 1.3557e-02, PNorm = 173.9107, GNorm = 0.2611, lr_0 = 3.7350e-04
Loss = 1.2734e-02, PNorm = 173.9271, GNorm = 0.2663, lr_0 = 3.7325e-04
Loss = 2.1676e-02, PNorm = 173.9478, GNorm = 0.2705, lr_0 = 3.7299e-04
Loss = 1.5614e-02, PNorm = 173.9694, GNorm = 0.3386, lr_0 = 3.7273e-04
Validation mae = 0.121707
Epoch 14
Loss = 1.2003e-02, PNorm = 173.9873, GNorm = 0.4454, lr_0 = 3.7248e-04
Loss = 1.8067e-02, PNorm = 174.0011, GNorm = 0.4162, lr_0 = 3.7222e-04
Loss = 1.7899e-02, PNorm = 174.0133, GNorm = 0.3978, lr_0 = 3.7197e-04
Loss = 1.1350e-02, PNorm = 174.0261, GNorm = 0.2202, lr_0 = 3.7171e-04
Loss = 1.1841e-02, PNorm = 174.0387, GNorm = 0.6129, lr_0 = 3.7146e-04
Loss = 1.4496e-02, PNorm = 174.0535, GNorm = 0.3936, lr_0 = 3.7120e-04
Loss = 1.2044e-02, PNorm = 174.0656, GNorm = 0.2992, lr_0 = 3.7095e-04
Loss = 1.8315e-02, PNorm = 174.0794, GNorm = 0.2586, lr_0 = 3.7070e-04
Loss = 2.0738e-02, PNorm = 174.0986, GNorm = 0.4141, lr_0 = 3.7044e-04
Loss = 1.3259e-02, PNorm = 174.1174, GNorm = 0.6551, lr_0 = 3.7019e-04
Loss = 1.1464e-02, PNorm = 174.1313, GNorm = 0.2784, lr_0 = 3.6993e-04
Loss = 1.2049e-02, PNorm = 174.1449, GNorm = 0.1651, lr_0 = 3.6968e-04
Loss = 1.1643e-02, PNorm = 174.1547, GNorm = 0.2109, lr_0 = 3.6943e-04
Loss = 1.1751e-02, PNorm = 174.1621, GNorm = 0.1189, lr_0 = 3.6917e-04
Loss = 1.3563e-02, PNorm = 174.1751, GNorm = 0.4183, lr_0 = 3.6892e-04
Loss = 1.3731e-02, PNorm = 174.1857, GNorm = 0.3765, lr_0 = 3.6867e-04
Loss = 1.1115e-02, PNorm = 174.2021, GNorm = 0.4192, lr_0 = 3.6842e-04
Loss = 1.4909e-02, PNorm = 174.2215, GNorm = 0.2667, lr_0 = 3.6816e-04
Loss = 1.2448e-02, PNorm = 174.2336, GNorm = 0.4829, lr_0 = 3.6791e-04
Loss = 1.1623e-02, PNorm = 174.2448, GNorm = 0.2571, lr_0 = 3.6766e-04
Loss = 1.4101e-02, PNorm = 174.2540, GNorm = 0.3096, lr_0 = 3.6741e-04
Loss = 2.8827e-02, PNorm = 174.2694, GNorm = 2.4587, lr_0 = 3.6716e-04
Loss = 1.3546e-02, PNorm = 174.2864, GNorm = 0.3859, lr_0 = 3.6690e-04
Loss = 1.2068e-02, PNorm = 174.3011, GNorm = 0.2743, lr_0 = 3.6665e-04
Loss = 1.5659e-02, PNorm = 174.3147, GNorm = 0.1659, lr_0 = 3.6640e-04
Loss = 1.8080e-02, PNorm = 174.3294, GNorm = 0.2313, lr_0 = 3.6615e-04
Loss = 1.6220e-02, PNorm = 174.3428, GNorm = 0.2319, lr_0 = 3.6590e-04
Loss = 1.1215e-02, PNorm = 174.3589, GNorm = 0.1714, lr_0 = 3.6565e-04
Loss = 1.4051e-02, PNorm = 174.3744, GNorm = 0.2520, lr_0 = 3.6540e-04
Loss = 1.3951e-02, PNorm = 174.3864, GNorm = 0.1767, lr_0 = 3.6515e-04
Loss = 1.3707e-02, PNorm = 174.3997, GNorm = 0.2333, lr_0 = 3.6490e-04
Loss = 1.4992e-02, PNorm = 174.4100, GNorm = 0.1313, lr_0 = 3.6465e-04
Loss = 1.2002e-02, PNorm = 174.4211, GNorm = 0.1443, lr_0 = 3.6440e-04
Loss = 1.8744e-02, PNorm = 174.4371, GNorm = 0.2746, lr_0 = 3.6415e-04
Loss = 1.2100e-02, PNorm = 174.4517, GNorm = 0.2033, lr_0 = 3.6390e-04
Loss = 9.9649e-03, PNorm = 174.4672, GNorm = 0.1474, lr_0 = 3.6365e-04
Loss = 1.2528e-02, PNorm = 174.4800, GNorm = 0.2664, lr_0 = 3.6340e-04
Loss = 1.7531e-02, PNorm = 174.4966, GNorm = 0.3484, lr_0 = 3.6315e-04
Loss = 1.1513e-02, PNorm = 174.5092, GNorm = 0.1959, lr_0 = 3.6290e-04
Loss = 1.2179e-02, PNorm = 174.5228, GNorm = 0.2029, lr_0 = 3.6266e-04
Loss = 1.0935e-02, PNorm = 174.5390, GNorm = 0.4133, lr_0 = 3.6241e-04
Loss = 1.0905e-02, PNorm = 174.5541, GNorm = 0.3513, lr_0 = 3.6216e-04
Loss = 1.2225e-02, PNorm = 174.5690, GNorm = 0.1239, lr_0 = 3.6191e-04
Loss = 1.2864e-02, PNorm = 174.5815, GNorm = 0.2268, lr_0 = 3.6166e-04
Loss = 1.1410e-02, PNorm = 174.5960, GNorm = 0.1569, lr_0 = 3.6141e-04
Loss = 1.0423e-02, PNorm = 174.6129, GNorm = 0.1509, lr_0 = 3.6117e-04
Loss = 1.0404e-02, PNorm = 174.6268, GNorm = 0.3203, lr_0 = 3.6092e-04
Loss = 1.7244e-02, PNorm = 174.6411, GNorm = 0.5584, lr_0 = 3.6067e-04
Loss = 1.1562e-02, PNorm = 174.6509, GNorm = 0.4784, lr_0 = 3.6043e-04
Loss = 1.2315e-02, PNorm = 174.6613, GNorm = 0.2736, lr_0 = 3.6018e-04
Loss = 1.0195e-02, PNorm = 174.6714, GNorm = 0.2844, lr_0 = 3.5993e-04
Loss = 1.4214e-02, PNorm = 174.6867, GNorm = 0.1100, lr_0 = 3.5969e-04
Loss = 2.1035e-02, PNorm = 174.7025, GNorm = 0.1618, lr_0 = 3.5944e-04
Loss = 1.5351e-02, PNorm = 174.7170, GNorm = 0.4619, lr_0 = 3.5919e-04
Loss = 1.6663e-02, PNorm = 174.7336, GNorm = 0.6875, lr_0 = 3.5895e-04
Loss = 1.4666e-02, PNorm = 174.7522, GNorm = 0.2743, lr_0 = 3.5870e-04
Loss = 1.2364e-02, PNorm = 174.7689, GNorm = 0.3838, lr_0 = 3.5845e-04
Loss = 1.3728e-02, PNorm = 174.7863, GNorm = 0.1549, lr_0 = 3.5821e-04
Loss = 1.2645e-02, PNorm = 174.8022, GNorm = 0.1460, lr_0 = 3.5796e-04
Loss = 1.1516e-02, PNorm = 174.8140, GNorm = 0.1975, lr_0 = 3.5772e-04
Loss = 1.4809e-02, PNorm = 174.8284, GNorm = 0.3510, lr_0 = 3.5747e-04
Loss = 1.5597e-02, PNorm = 174.8432, GNorm = 0.1898, lr_0 = 3.5723e-04
Loss = 1.7020e-02, PNorm = 174.8574, GNorm = 0.7056, lr_0 = 3.5698e-04
Loss = 9.6649e-03, PNorm = 174.8714, GNorm = 0.1403, lr_0 = 3.5674e-04
Loss = 1.5586e-02, PNorm = 174.8861, GNorm = 1.5384, lr_0 = 3.5650e-04
Loss = 1.3627e-02, PNorm = 174.8995, GNorm = 0.2591, lr_0 = 3.5625e-04
Loss = 1.2155e-02, PNorm = 174.9164, GNorm = 0.3261, lr_0 = 3.5601e-04
Loss = 1.7091e-02, PNorm = 174.9349, GNorm = 0.1500, lr_0 = 3.5576e-04
Loss = 1.3543e-02, PNorm = 174.9502, GNorm = 0.1417, lr_0 = 3.5552e-04
Loss = 1.4225e-02, PNorm = 174.9657, GNorm = 0.4791, lr_0 = 3.5528e-04
Loss = 1.3479e-02, PNorm = 174.9809, GNorm = 0.2612, lr_0 = 3.5503e-04
Loss = 1.2137e-02, PNorm = 174.9941, GNorm = 0.1723, lr_0 = 3.5479e-04
Loss = 1.2363e-02, PNorm = 175.0093, GNorm = 0.2174, lr_0 = 3.5455e-04
Loss = 1.4108e-02, PNorm = 175.0231, GNorm = 0.1810, lr_0 = 3.5430e-04
Loss = 1.1420e-02, PNorm = 175.0373, GNorm = 0.3669, lr_0 = 3.5406e-04
Loss = 1.1645e-02, PNorm = 175.0531, GNorm = 0.1771, lr_0 = 3.5382e-04
Loss = 1.4674e-02, PNorm = 175.0679, GNorm = 0.2928, lr_0 = 3.5358e-04
Loss = 1.3076e-02, PNorm = 175.0832, GNorm = 0.3385, lr_0 = 3.5333e-04
Loss = 1.3087e-02, PNorm = 175.0984, GNorm = 0.1975, lr_0 = 3.5309e-04
Loss = 1.4027e-02, PNorm = 175.1120, GNorm = 0.1755, lr_0 = 3.5285e-04
Loss = 1.4939e-02, PNorm = 175.1298, GNorm = 0.1831, lr_0 = 3.5261e-04
Loss = 2.0049e-02, PNorm = 175.1457, GNorm = 0.2052, lr_0 = 3.5237e-04
Loss = 1.3039e-02, PNorm = 175.1624, GNorm = 0.1406, lr_0 = 3.5212e-04
Loss = 1.3301e-02, PNorm = 175.1797, GNorm = 0.3966, lr_0 = 3.5188e-04
Loss = 1.6725e-02, PNorm = 175.1956, GNorm = 0.3241, lr_0 = 3.5164e-04
Loss = 1.1033e-02, PNorm = 175.2122, GNorm = 0.3207, lr_0 = 3.5140e-04
Loss = 1.7043e-02, PNorm = 175.2266, GNorm = 0.2345, lr_0 = 3.5116e-04
Loss = 1.0303e-02, PNorm = 175.2409, GNorm = 0.2121, lr_0 = 3.5092e-04
Loss = 1.0452e-02, PNorm = 175.2575, GNorm = 0.2803, lr_0 = 3.5068e-04
Loss = 1.8480e-02, PNorm = 175.2739, GNorm = 0.1959, lr_0 = 3.5044e-04
Loss = 1.5811e-02, PNorm = 175.2910, GNorm = 0.9181, lr_0 = 3.5020e-04
Loss = 1.5195e-02, PNorm = 175.3083, GNorm = 0.3611, lr_0 = 3.4996e-04
Loss = 1.3671e-02, PNorm = 175.3248, GNorm = 0.1644, lr_0 = 3.4972e-04
Loss = 1.2883e-02, PNorm = 175.3402, GNorm = 0.1221, lr_0 = 3.4948e-04
Loss = 1.1224e-02, PNorm = 175.3553, GNorm = 0.2823, lr_0 = 3.4924e-04
Loss = 1.4089e-02, PNorm = 175.3715, GNorm = 0.3010, lr_0 = 3.4900e-04
Loss = 1.2179e-02, PNorm = 175.3885, GNorm = 0.1525, lr_0 = 3.4876e-04
Loss = 1.5024e-02, PNorm = 175.4071, GNorm = 0.2872, lr_0 = 3.4852e-04
Loss = 1.4174e-02, PNorm = 175.4235, GNorm = 0.2273, lr_0 = 3.4828e-04
Loss = 1.1074e-02, PNorm = 175.4346, GNorm = 0.5318, lr_0 = 3.4805e-04
Loss = 1.7514e-02, PNorm = 175.4494, GNorm = 0.3701, lr_0 = 3.4781e-04
Loss = 1.4035e-02, PNorm = 175.4638, GNorm = 0.2475, lr_0 = 3.4757e-04
Loss = 1.3654e-02, PNorm = 175.4790, GNorm = 0.2579, lr_0 = 3.4733e-04
Loss = 1.8855e-02, PNorm = 175.4994, GNorm = 0.4210, lr_0 = 3.4709e-04
Loss = 1.1436e-02, PNorm = 175.5182, GNorm = 0.1683, lr_0 = 3.4686e-04
Loss = 1.2252e-02, PNorm = 175.5327, GNorm = 0.1313, lr_0 = 3.4662e-04
Loss = 1.5881e-02, PNorm = 175.5466, GNorm = 0.3231, lr_0 = 3.4638e-04
Loss = 1.8069e-02, PNorm = 175.5640, GNorm = 0.1507, lr_0 = 3.4614e-04
Loss = 1.2740e-02, PNorm = 175.5817, GNorm = 0.2959, lr_0 = 3.4591e-04
Loss = 1.7505e-02, PNorm = 175.5985, GNorm = 0.4104, lr_0 = 3.4567e-04
Loss = 1.1938e-02, PNorm = 175.6119, GNorm = 0.2641, lr_0 = 3.4543e-04
Loss = 1.0121e-02, PNorm = 175.6275, GNorm = 0.2107, lr_0 = 3.4520e-04
Loss = 1.3005e-02, PNorm = 175.6434, GNorm = 0.2567, lr_0 = 3.4496e-04
Loss = 1.6909e-02, PNorm = 175.6559, GNorm = 0.2661, lr_0 = 3.4472e-04
Loss = 1.5783e-02, PNorm = 175.6695, GNorm = 0.2549, lr_0 = 3.4449e-04
Loss = 2.2429e-02, PNorm = 175.6849, GNorm = 0.3605, lr_0 = 3.4425e-04
Loss = 1.3496e-02, PNorm = 175.6990, GNorm = 0.1577, lr_0 = 3.4402e-04
Loss = 1.6851e-02, PNorm = 175.7171, GNorm = 0.3095, lr_0 = 3.4378e-04
Loss = 1.4469e-02, PNorm = 175.7347, GNorm = 0.2839, lr_0 = 3.4354e-04
Loss = 1.2008e-02, PNorm = 175.7521, GNorm = 0.3977, lr_0 = 3.4331e-04
Validation mae = 0.121502
Epoch 15
Loss = 1.1442e-02, PNorm = 175.7641, GNorm = 0.3936, lr_0 = 3.4307e-04
Loss = 1.3706e-02, PNorm = 175.7750, GNorm = 0.4162, lr_0 = 3.4284e-04
Loss = 9.7379e-03, PNorm = 175.7871, GNorm = 0.2694, lr_0 = 3.4260e-04
Loss = 1.3346e-02, PNorm = 175.7999, GNorm = 0.2108, lr_0 = 3.4237e-04
Loss = 1.2008e-02, PNorm = 175.8136, GNorm = 0.2954, lr_0 = 3.4213e-04
Loss = 1.1863e-02, PNorm = 175.8262, GNorm = 0.4955, lr_0 = 3.4190e-04
Loss = 1.1047e-02, PNorm = 175.8381, GNorm = 0.3497, lr_0 = 3.4167e-04
Loss = 1.4919e-02, PNorm = 175.8478, GNorm = 0.2509, lr_0 = 3.4143e-04
Loss = 1.2568e-02, PNorm = 175.8605, GNorm = 0.4583, lr_0 = 3.4120e-04
Loss = 9.0498e-03, PNorm = 175.8745, GNorm = 0.2688, lr_0 = 3.4096e-04
Loss = 1.0692e-02, PNorm = 175.8859, GNorm = 0.3125, lr_0 = 3.4073e-04
Loss = 1.2320e-02, PNorm = 175.8971, GNorm = 0.5703, lr_0 = 3.4050e-04
Loss = 1.0361e-02, PNorm = 175.9086, GNorm = 0.3254, lr_0 = 3.4026e-04
Loss = 1.4009e-02, PNorm = 175.9223, GNorm = 0.9017, lr_0 = 3.4003e-04
Loss = 1.2261e-02, PNorm = 175.9339, GNorm = 0.2176, lr_0 = 3.3980e-04
Loss = 1.4060e-02, PNorm = 175.9449, GNorm = 0.5579, lr_0 = 3.3956e-04
Loss = 9.7076e-03, PNorm = 175.9583, GNorm = 0.1263, lr_0 = 3.3933e-04
Loss = 1.3008e-02, PNorm = 175.9695, GNorm = 0.1482, lr_0 = 3.3910e-04
Loss = 1.2133e-02, PNorm = 175.9810, GNorm = 0.4812, lr_0 = 3.3887e-04
Loss = 1.2764e-02, PNorm = 175.9930, GNorm = 0.5150, lr_0 = 3.3864e-04
Loss = 1.1959e-02, PNorm = 176.0059, GNorm = 0.4414, lr_0 = 3.3840e-04
Loss = 1.3056e-02, PNorm = 176.0181, GNorm = 0.2468, lr_0 = 3.3817e-04
Loss = 1.6446e-02, PNorm = 176.0315, GNorm = 0.1866, lr_0 = 3.3794e-04
Loss = 1.1013e-02, PNorm = 176.0453, GNorm = 0.2375, lr_0 = 3.3771e-04
Loss = 1.3130e-02, PNorm = 176.0590, GNorm = 0.2772, lr_0 = 3.3748e-04
Loss = 1.8389e-02, PNorm = 176.0723, GNorm = 0.5163, lr_0 = 3.3725e-04
Loss = 1.7259e-02, PNorm = 176.0815, GNorm = 0.1384, lr_0 = 3.3701e-04
Loss = 1.3688e-02, PNorm = 176.0928, GNorm = 0.2415, lr_0 = 3.3678e-04
Loss = 1.2724e-02, PNorm = 176.1073, GNorm = 0.2306, lr_0 = 3.3655e-04
Loss = 9.9731e-03, PNorm = 176.1196, GNorm = 0.4353, lr_0 = 3.3632e-04
Loss = 1.2187e-02, PNorm = 176.1311, GNorm = 0.2095, lr_0 = 3.3609e-04
Loss = 1.3038e-02, PNorm = 176.1423, GNorm = 0.6145, lr_0 = 3.3586e-04
Loss = 1.2921e-02, PNorm = 176.1536, GNorm = 0.5491, lr_0 = 3.3563e-04
Loss = 1.5671e-02, PNorm = 176.1663, GNorm = 0.2622, lr_0 = 3.3540e-04
Loss = 1.6769e-02, PNorm = 176.1786, GNorm = 0.2142, lr_0 = 3.3517e-04
Loss = 1.1247e-02, PNorm = 176.1915, GNorm = 0.2809, lr_0 = 3.3494e-04
Loss = 1.6474e-02, PNorm = 176.2020, GNorm = 0.2273, lr_0 = 3.3471e-04
Loss = 1.0931e-02, PNorm = 176.2164, GNorm = 0.3497, lr_0 = 3.3448e-04
Loss = 1.1093e-02, PNorm = 176.2287, GNorm = 0.3529, lr_0 = 3.3425e-04
Loss = 1.4556e-02, PNorm = 176.2396, GNorm = 0.5476, lr_0 = 3.3403e-04
Loss = 9.9883e-03, PNorm = 176.2541, GNorm = 0.5037, lr_0 = 3.3380e-04
Loss = 1.2181e-02, PNorm = 176.2633, GNorm = 0.6770, lr_0 = 3.3357e-04
Loss = 1.2079e-02, PNorm = 176.2751, GNorm = 0.1620, lr_0 = 3.3334e-04
Loss = 1.0499e-02, PNorm = 176.2875, GNorm = 0.1518, lr_0 = 3.3311e-04
Loss = 1.5145e-02, PNorm = 176.3029, GNorm = 0.1410, lr_0 = 3.3288e-04
Loss = 1.4200e-02, PNorm = 176.3166, GNorm = 0.9042, lr_0 = 3.3265e-04
Loss = 8.8717e-03, PNorm = 176.3261, GNorm = 0.1956, lr_0 = 3.3243e-04
Loss = 1.1370e-02, PNorm = 176.3380, GNorm = 0.1636, lr_0 = 3.3220e-04
Loss = 1.0620e-02, PNorm = 176.3484, GNorm = 0.4936, lr_0 = 3.3197e-04
Loss = 1.2954e-02, PNorm = 176.3627, GNorm = 0.3579, lr_0 = 3.3174e-04
Loss = 1.0942e-02, PNorm = 176.3763, GNorm = 0.2335, lr_0 = 3.3152e-04
Loss = 1.1444e-02, PNorm = 176.3869, GNorm = 0.6621, lr_0 = 3.3129e-04
Loss = 9.9055e-03, PNorm = 176.3997, GNorm = 0.2654, lr_0 = 3.3106e-04
Loss = 9.6351e-03, PNorm = 176.4121, GNorm = 0.1501, lr_0 = 3.3084e-04
Loss = 1.3197e-02, PNorm = 176.4254, GNorm = 0.6437, lr_0 = 3.3061e-04
Loss = 1.4258e-02, PNorm = 176.4395, GNorm = 0.3530, lr_0 = 3.3038e-04
Loss = 1.2341e-02, PNorm = 176.4521, GNorm = 0.1191, lr_0 = 3.3016e-04
Loss = 1.2129e-02, PNorm = 176.4617, GNorm = 0.3830, lr_0 = 3.2993e-04
Loss = 9.0806e-03, PNorm = 176.4736, GNorm = 0.1384, lr_0 = 3.2970e-04
Loss = 1.7105e-02, PNorm = 176.4857, GNorm = 0.2370, lr_0 = 3.2948e-04
Loss = 8.6410e-03, PNorm = 176.4959, GNorm = 0.1442, lr_0 = 3.2925e-04
Loss = 9.9607e-03, PNorm = 176.5067, GNorm = 0.6812, lr_0 = 3.2903e-04
Loss = 1.2866e-02, PNorm = 176.5179, GNorm = 0.1489, lr_0 = 3.2880e-04
Loss = 8.7082e-03, PNorm = 176.5285, GNorm = 0.2051, lr_0 = 3.2858e-04
Loss = 9.6415e-03, PNorm = 176.5392, GNorm = 0.2879, lr_0 = 3.2835e-04
Loss = 1.1461e-02, PNorm = 176.5544, GNorm = 0.3132, lr_0 = 3.2813e-04
Loss = 1.1519e-02, PNorm = 176.5687, GNorm = 0.2179, lr_0 = 3.2790e-04
Loss = 1.2078e-02, PNorm = 176.5821, GNorm = 0.3019, lr_0 = 3.2768e-04
Loss = 1.2234e-02, PNorm = 176.5950, GNorm = 0.1714, lr_0 = 3.2745e-04
Loss = 8.5967e-03, PNorm = 176.6108, GNorm = 0.4345, lr_0 = 3.2723e-04
Loss = 1.2830e-02, PNorm = 176.6264, GNorm = 1.0384, lr_0 = 3.2700e-04
Loss = 1.0864e-02, PNorm = 176.6405, GNorm = 0.2901, lr_0 = 3.2678e-04
Loss = 1.3068e-02, PNorm = 176.6533, GNorm = 0.4197, lr_0 = 3.2656e-04
Loss = 1.0745e-02, PNorm = 176.6689, GNorm = 0.3306, lr_0 = 3.2633e-04
Loss = 1.0512e-02, PNorm = 176.6811, GNorm = 0.2757, lr_0 = 3.2611e-04
Loss = 1.1157e-02, PNorm = 176.6911, GNorm = 0.2277, lr_0 = 3.2589e-04
Loss = 1.0369e-02, PNorm = 176.7024, GNorm = 0.1248, lr_0 = 3.2566e-04
Loss = 1.5754e-02, PNorm = 176.7152, GNorm = 0.2736, lr_0 = 3.2544e-04
Loss = 2.9109e-02, PNorm = 176.7309, GNorm = 0.1926, lr_0 = 3.2522e-04
Loss = 1.5567e-02, PNorm = 176.7495, GNorm = 0.2286, lr_0 = 3.2499e-04
Loss = 9.1715e-03, PNorm = 176.7627, GNorm = 0.1472, lr_0 = 3.2477e-04
Loss = 1.3939e-02, PNorm = 176.7744, GNorm = 0.3631, lr_0 = 3.2455e-04
Loss = 1.4949e-02, PNorm = 176.7864, GNorm = 0.1290, lr_0 = 3.2433e-04
Loss = 1.1340e-02, PNorm = 176.7981, GNorm = 0.3408, lr_0 = 3.2410e-04
Loss = 9.3411e-03, PNorm = 176.8092, GNorm = 0.1648, lr_0 = 3.2388e-04
Loss = 1.2090e-02, PNorm = 176.8197, GNorm = 0.3169, lr_0 = 3.2366e-04
Loss = 9.0176e-03, PNorm = 176.8304, GNorm = 0.1478, lr_0 = 3.2344e-04
Loss = 1.0936e-02, PNorm = 176.8427, GNorm = 0.1489, lr_0 = 3.2322e-04
Loss = 1.0531e-02, PNorm = 176.8561, GNorm = 0.2981, lr_0 = 3.2300e-04
Loss = 1.2807e-02, PNorm = 176.8737, GNorm = 0.3370, lr_0 = 3.2277e-04
Loss = 1.3398e-02, PNorm = 176.8892, GNorm = 0.4209, lr_0 = 3.2255e-04
Loss = 6.9614e-03, PNorm = 176.9038, GNorm = 0.1533, lr_0 = 3.2233e-04
Loss = 1.6538e-02, PNorm = 176.9168, GNorm = 0.1458, lr_0 = 3.2211e-04
Loss = 1.0695e-02, PNorm = 176.9298, GNorm = 0.2184, lr_0 = 3.2189e-04
Loss = 1.6482e-02, PNorm = 176.9430, GNorm = 0.2805, lr_0 = 3.2167e-04
Loss = 9.9049e-03, PNorm = 176.9561, GNorm = 0.1895, lr_0 = 3.2145e-04
Loss = 9.7983e-03, PNorm = 176.9692, GNorm = 0.1500, lr_0 = 3.2123e-04
Loss = 1.4760e-02, PNorm = 176.9816, GNorm = 0.3441, lr_0 = 3.2101e-04
Loss = 1.1728e-02, PNorm = 176.9977, GNorm = 0.2229, lr_0 = 3.2079e-04
Loss = 1.4895e-02, PNorm = 177.0122, GNorm = 0.8421, lr_0 = 3.2057e-04
Loss = 1.3106e-02, PNorm = 177.0276, GNorm = 0.4987, lr_0 = 3.2035e-04
Loss = 1.7708e-02, PNorm = 177.0434, GNorm = 0.3202, lr_0 = 3.2013e-04
Loss = 1.0261e-02, PNorm = 177.0563, GNorm = 0.3630, lr_0 = 3.1991e-04
Loss = 1.2259e-02, PNorm = 177.0674, GNorm = 0.1657, lr_0 = 3.1969e-04
Loss = 1.2116e-02, PNorm = 177.0788, GNorm = 0.1516, lr_0 = 3.1947e-04
Loss = 1.3028e-02, PNorm = 177.0898, GNorm = 0.1130, lr_0 = 3.1925e-04
Loss = 1.1918e-02, PNorm = 177.1024, GNorm = 0.1409, lr_0 = 3.1904e-04
Loss = 1.6730e-02, PNorm = 177.1159, GNorm = 0.2542, lr_0 = 3.1882e-04
Loss = 1.1790e-02, PNorm = 177.1270, GNorm = 0.3066, lr_0 = 3.1860e-04
Loss = 1.0500e-02, PNorm = 177.1401, GNorm = 0.2285, lr_0 = 3.1838e-04
Loss = 1.0508e-02, PNorm = 177.1535, GNorm = 0.3960, lr_0 = 3.1816e-04
Loss = 8.6683e-03, PNorm = 177.1657, GNorm = 0.3170, lr_0 = 3.1794e-04
Loss = 1.1297e-02, PNorm = 177.1782, GNorm = 0.1357, lr_0 = 3.1773e-04
Loss = 1.1349e-02, PNorm = 177.1918, GNorm = 0.1104, lr_0 = 3.1751e-04
Loss = 9.8384e-03, PNorm = 177.2064, GNorm = 0.1440, lr_0 = 3.1729e-04
Loss = 1.6300e-02, PNorm = 177.2178, GNorm = 0.3599, lr_0 = 3.1707e-04
Loss = 9.5889e-03, PNorm = 177.2330, GNorm = 0.1133, lr_0 = 3.1686e-04
Loss = 1.1236e-02, PNorm = 177.2460, GNorm = 0.1458, lr_0 = 3.1664e-04
Loss = 9.3622e-03, PNorm = 177.2596, GNorm = 0.1104, lr_0 = 3.1642e-04
Loss = 1.1526e-02, PNorm = 177.2766, GNorm = 0.1193, lr_0 = 3.1621e-04
Validation mae = 0.121410
Epoch 16
Loss = 1.5440e-02, PNorm = 177.2924, GNorm = 0.1847, lr_0 = 3.1599e-04
Loss = 1.3877e-02, PNorm = 177.3080, GNorm = 0.2256, lr_0 = 3.1577e-04
Loss = 9.6874e-03, PNorm = 177.3171, GNorm = 0.2420, lr_0 = 3.1556e-04
Loss = 1.1690e-02, PNorm = 177.3249, GNorm = 0.3649, lr_0 = 3.1534e-04
Loss = 1.0279e-02, PNorm = 177.3348, GNorm = 0.2132, lr_0 = 3.1512e-04
Loss = 9.6405e-03, PNorm = 177.3450, GNorm = 0.2918, lr_0 = 3.1491e-04
Loss = 7.5287e-03, PNorm = 177.3534, GNorm = 0.1542, lr_0 = 3.1469e-04
Loss = 8.9586e-03, PNorm = 177.3634, GNorm = 0.1022, lr_0 = 3.1448e-04
Loss = 1.1214e-02, PNorm = 177.3737, GNorm = 0.2224, lr_0 = 3.1426e-04
Loss = 9.9328e-03, PNorm = 177.3841, GNorm = 0.2185, lr_0 = 3.1405e-04
Loss = 1.1282e-02, PNorm = 177.3954, GNorm = 0.1632, lr_0 = 3.1383e-04
Loss = 8.2293e-03, PNorm = 177.4090, GNorm = 0.3538, lr_0 = 3.1362e-04
Loss = 1.1274e-02, PNorm = 177.4174, GNorm = 0.2254, lr_0 = 3.1340e-04
Loss = 9.3563e-03, PNorm = 177.4291, GNorm = 0.1203, lr_0 = 3.1319e-04
Loss = 8.9149e-03, PNorm = 177.4411, GNorm = 0.5011, lr_0 = 3.1297e-04
Loss = 1.4452e-02, PNorm = 177.4517, GNorm = 0.2983, lr_0 = 3.1276e-04
Loss = 9.3589e-03, PNorm = 177.4589, GNorm = 0.1347, lr_0 = 3.1254e-04
Loss = 8.2151e-03, PNorm = 177.4688, GNorm = 0.2741, lr_0 = 3.1233e-04
Loss = 8.4273e-03, PNorm = 177.4783, GNorm = 0.1548, lr_0 = 3.1212e-04
Loss = 7.4104e-03, PNorm = 177.4854, GNorm = 0.3691, lr_0 = 3.1190e-04
Loss = 1.2642e-02, PNorm = 177.4922, GNorm = 0.2123, lr_0 = 3.1169e-04
Loss = 9.4622e-03, PNorm = 177.5019, GNorm = 0.2248, lr_0 = 3.1147e-04
Loss = 7.5929e-03, PNorm = 177.5098, GNorm = 0.2124, lr_0 = 3.1126e-04
Loss = 8.1848e-03, PNorm = 177.5189, GNorm = 0.2243, lr_0 = 3.1105e-04
Loss = 1.1372e-02, PNorm = 177.5305, GNorm = 0.1447, lr_0 = 3.1083e-04
Loss = 9.5786e-03, PNorm = 177.5430, GNorm = 0.1570, lr_0 = 3.1062e-04
Loss = 9.5786e-03, PNorm = 177.5557, GNorm = 0.2346, lr_0 = 3.1041e-04
Loss = 9.7331e-03, PNorm = 177.5666, GNorm = 0.2983, lr_0 = 3.1020e-04
Loss = 2.0620e-02, PNorm = 177.5743, GNorm = 0.2548, lr_0 = 3.0998e-04
Loss = 1.1131e-02, PNorm = 177.5848, GNorm = 0.1775, lr_0 = 3.0977e-04
Loss = 9.4411e-03, PNorm = 177.5977, GNorm = 0.2875, lr_0 = 3.0956e-04
Loss = 1.0994e-02, PNorm = 177.6097, GNorm = 0.2454, lr_0 = 3.0935e-04
Loss = 8.9210e-03, PNorm = 177.6200, GNorm = 0.2869, lr_0 = 3.0914e-04
Loss = 1.2061e-02, PNorm = 177.6301, GNorm = 0.8080, lr_0 = 3.0892e-04
Loss = 7.8665e-03, PNorm = 177.6395, GNorm = 0.1748, lr_0 = 3.0871e-04
Loss = 1.1340e-02, PNorm = 177.6542, GNorm = 0.2032, lr_0 = 3.0850e-04
Loss = 7.6975e-03, PNorm = 177.6667, GNorm = 0.1753, lr_0 = 3.0829e-04
Loss = 9.2466e-03, PNorm = 177.6748, GNorm = 0.1814, lr_0 = 3.0808e-04
Loss = 1.0005e-02, PNorm = 177.6850, GNorm = 0.1877, lr_0 = 3.0787e-04
Loss = 9.6056e-03, PNorm = 177.6922, GNorm = 0.2844, lr_0 = 3.0766e-04
Loss = 1.4534e-02, PNorm = 177.7001, GNorm = 0.2642, lr_0 = 3.0745e-04
Loss = 9.5262e-03, PNorm = 177.7111, GNorm = 0.2134, lr_0 = 3.0723e-04
Loss = 1.0249e-02, PNorm = 177.7201, GNorm = 0.2241, lr_0 = 3.0702e-04
Loss = 1.4118e-02, PNorm = 177.7298, GNorm = 0.5910, lr_0 = 3.0681e-04
Loss = 1.3395e-02, PNorm = 177.7419, GNorm = 0.2754, lr_0 = 3.0660e-04
Loss = 9.8144e-03, PNorm = 177.7513, GNorm = 0.2688, lr_0 = 3.0639e-04
Loss = 1.1398e-02, PNorm = 177.7622, GNorm = 0.3096, lr_0 = 3.0618e-04
Loss = 8.7582e-03, PNorm = 177.7717, GNorm = 0.4371, lr_0 = 3.0597e-04
Loss = 8.6742e-03, PNorm = 177.7828, GNorm = 0.2096, lr_0 = 3.0576e-04
Loss = 8.5422e-03, PNorm = 177.7928, GNorm = 0.3262, lr_0 = 3.0555e-04
Loss = 1.8803e-02, PNorm = 177.8043, GNorm = 0.5225, lr_0 = 3.0535e-04
Loss = 9.2365e-03, PNorm = 177.8133, GNorm = 0.2099, lr_0 = 3.0514e-04
Loss = 9.9881e-03, PNorm = 177.8256, GNorm = 0.2584, lr_0 = 3.0493e-04
Loss = 7.1400e-03, PNorm = 177.8396, GNorm = 0.1299, lr_0 = 3.0472e-04
Loss = 7.4979e-03, PNorm = 177.8527, GNorm = 0.1647, lr_0 = 3.0451e-04
Loss = 1.0581e-02, PNorm = 177.8644, GNorm = 0.3069, lr_0 = 3.0430e-04
Loss = 1.3542e-02, PNorm = 177.8741, GNorm = 0.0879, lr_0 = 3.0409e-04
Loss = 9.4539e-03, PNorm = 177.8837, GNorm = 0.1181, lr_0 = 3.0388e-04
Loss = 1.3319e-02, PNorm = 177.8937, GNorm = 0.1147, lr_0 = 3.0368e-04
Loss = 9.5247e-03, PNorm = 177.9056, GNorm = 0.3984, lr_0 = 3.0347e-04
Loss = 8.9078e-03, PNorm = 177.9149, GNorm = 0.3332, lr_0 = 3.0326e-04
Loss = 8.0359e-03, PNorm = 177.9266, GNorm = 0.3597, lr_0 = 3.0305e-04
Loss = 1.0018e-02, PNorm = 177.9371, GNorm = 0.6313, lr_0 = 3.0284e-04
Loss = 1.2604e-02, PNorm = 177.9417, GNorm = 0.2279, lr_0 = 3.0264e-04
Loss = 1.2108e-02, PNorm = 177.9531, GNorm = 0.3327, lr_0 = 3.0243e-04
Loss = 9.9581e-03, PNorm = 177.9623, GNorm = 0.2180, lr_0 = 3.0222e-04
Loss = 1.1894e-02, PNorm = 177.9706, GNorm = 0.5511, lr_0 = 3.0202e-04
Loss = 9.4004e-03, PNorm = 177.9822, GNorm = 0.3432, lr_0 = 3.0181e-04
Loss = 7.9083e-03, PNorm = 177.9938, GNorm = 0.3482, lr_0 = 3.0160e-04
Loss = 1.0317e-02, PNorm = 178.0076, GNorm = 0.3400, lr_0 = 3.0140e-04
Loss = 8.1097e-03, PNorm = 178.0233, GNorm = 0.3277, lr_0 = 3.0119e-04
Loss = 1.2709e-02, PNorm = 178.0378, GNorm = 0.1000, lr_0 = 3.0098e-04
Loss = 1.1763e-02, PNorm = 178.0500, GNorm = 0.3281, lr_0 = 3.0078e-04
Loss = 1.0449e-02, PNorm = 178.0571, GNorm = 0.3743, lr_0 = 3.0057e-04
Loss = 6.8199e-03, PNorm = 178.0673, GNorm = 0.1450, lr_0 = 3.0036e-04
Loss = 9.4860e-03, PNorm = 178.0737, GNorm = 0.2326, lr_0 = 3.0016e-04
Loss = 7.6398e-03, PNorm = 178.0793, GNorm = 0.0971, lr_0 = 2.9995e-04
Loss = 9.9829e-03, PNorm = 178.0895, GNorm = 0.2417, lr_0 = 2.9975e-04
Loss = 1.2434e-02, PNorm = 178.0995, GNorm = 0.1458, lr_0 = 2.9954e-04
Loss = 7.4340e-03, PNorm = 178.1098, GNorm = 0.2505, lr_0 = 2.9934e-04
Loss = 1.0013e-02, PNorm = 178.1209, GNorm = 0.5121, lr_0 = 2.9913e-04
Loss = 8.1229e-03, PNorm = 178.1300, GNorm = 0.1899, lr_0 = 2.9893e-04
Loss = 8.4325e-03, PNorm = 178.1396, GNorm = 0.2313, lr_0 = 2.9872e-04
Loss = 8.0453e-03, PNorm = 178.1494, GNorm = 0.2566, lr_0 = 2.9852e-04
Loss = 8.1101e-03, PNorm = 178.1579, GNorm = 0.2669, lr_0 = 2.9831e-04
Loss = 1.3101e-02, PNorm = 178.1666, GNorm = 0.1291, lr_0 = 2.9811e-04
Loss = 8.6830e-03, PNorm = 178.1793, GNorm = 0.1922, lr_0 = 2.9790e-04
Loss = 1.2989e-02, PNorm = 178.1916, GNorm = 0.3840, lr_0 = 2.9770e-04
Loss = 1.2552e-02, PNorm = 178.2067, GNorm = 0.4253, lr_0 = 2.9750e-04
Loss = 8.0722e-03, PNorm = 178.2169, GNorm = 0.1328, lr_0 = 2.9729e-04
Loss = 2.5013e-02, PNorm = 178.2298, GNorm = 2.9754, lr_0 = 2.9709e-04
Loss = 1.1043e-02, PNorm = 178.2425, GNorm = 0.2648, lr_0 = 2.9689e-04
Loss = 8.8947e-03, PNorm = 178.2521, GNorm = 0.1023, lr_0 = 2.9668e-04
Loss = 1.1338e-02, PNorm = 178.2662, GNorm = 0.3828, lr_0 = 2.9648e-04
Loss = 1.3737e-02, PNorm = 178.2766, GNorm = 0.0951, lr_0 = 2.9628e-04
Loss = 9.6786e-03, PNorm = 178.2904, GNorm = 0.1303, lr_0 = 2.9607e-04
Loss = 9.1487e-03, PNorm = 178.3031, GNorm = 0.1797, lr_0 = 2.9587e-04
Loss = 7.3514e-03, PNorm = 178.3186, GNorm = 0.1797, lr_0 = 2.9567e-04
Loss = 1.0006e-02, PNorm = 178.3325, GNorm = 0.2235, lr_0 = 2.9546e-04
Loss = 9.6749e-03, PNorm = 178.3417, GNorm = 0.1972, lr_0 = 2.9526e-04
Loss = 1.2445e-02, PNorm = 178.3565, GNorm = 0.1048, lr_0 = 2.9506e-04
Loss = 8.2921e-03, PNorm = 178.3693, GNorm = 0.3687, lr_0 = 2.9486e-04
Loss = 1.3027e-02, PNorm = 178.3794, GNorm = 0.4713, lr_0 = 2.9466e-04
Loss = 1.2642e-02, PNorm = 178.3923, GNorm = 0.1728, lr_0 = 2.9445e-04
Loss = 1.7598e-02, PNorm = 178.4023, GNorm = 0.4863, lr_0 = 2.9425e-04
Loss = 1.1162e-02, PNorm = 178.4121, GNorm = 0.2348, lr_0 = 2.9405e-04
Loss = 9.7074e-03, PNorm = 178.4224, GNorm = 0.2491, lr_0 = 2.9385e-04
Loss = 1.5954e-02, PNorm = 178.4344, GNorm = 0.2884, lr_0 = 2.9365e-04
Loss = 1.1902e-02, PNorm = 178.4471, GNorm = 0.1909, lr_0 = 2.9345e-04
Loss = 1.3120e-02, PNorm = 178.4615, GNorm = 0.1544, lr_0 = 2.9325e-04
Loss = 8.5138e-03, PNorm = 178.4761, GNorm = 0.1607, lr_0 = 2.9305e-04
Loss = 1.3773e-02, PNorm = 178.4862, GNorm = 0.2356, lr_0 = 2.9284e-04
Loss = 1.8631e-02, PNorm = 178.4981, GNorm = 0.3137, lr_0 = 2.9264e-04
Loss = 7.7896e-03, PNorm = 178.5117, GNorm = 0.4138, lr_0 = 2.9244e-04
Loss = 9.8504e-03, PNorm = 178.5238, GNorm = 0.1791, lr_0 = 2.9224e-04
Loss = 1.0952e-02, PNorm = 178.5374, GNorm = 0.3670, lr_0 = 2.9204e-04
Loss = 1.2861e-02, PNorm = 178.5483, GNorm = 0.1505, lr_0 = 2.9184e-04
Loss = 1.2563e-02, PNorm = 178.5596, GNorm = 0.1610, lr_0 = 2.9164e-04
Loss = 8.4529e-03, PNorm = 178.5731, GNorm = 0.1517, lr_0 = 2.9144e-04
Loss = 2.2386e-02, PNorm = 178.5865, GNorm = 1.8020, lr_0 = 2.9124e-04
Validation mae = 0.121656
Epoch 17
Loss = 9.7027e-03, PNorm = 178.5943, GNorm = 0.4057, lr_0 = 2.9104e-04
Loss = 9.4147e-03, PNorm = 178.6049, GNorm = 0.1460, lr_0 = 2.9084e-04
Loss = 9.8875e-03, PNorm = 178.6108, GNorm = 0.1989, lr_0 = 2.9065e-04
Loss = 1.0417e-02, PNorm = 178.6199, GNorm = 0.2340, lr_0 = 2.9045e-04
Loss = 9.6816e-03, PNorm = 178.6300, GNorm = 0.2383, lr_0 = 2.9025e-04
Loss = 8.9602e-03, PNorm = 178.6391, GNorm = 0.1847, lr_0 = 2.9005e-04
Loss = 9.6332e-03, PNorm = 178.6488, GNorm = 0.5667, lr_0 = 2.8985e-04
Loss = 1.3488e-02, PNorm = 178.6567, GNorm = 0.5262, lr_0 = 2.8965e-04
Loss = 9.1795e-03, PNorm = 178.6646, GNorm = 0.3140, lr_0 = 2.8945e-04
Loss = 1.3052e-02, PNorm = 178.6763, GNorm = 0.1807, lr_0 = 2.8925e-04
Loss = 7.4734e-03, PNorm = 178.6885, GNorm = 0.2641, lr_0 = 2.8906e-04
Loss = 8.3641e-03, PNorm = 178.6996, GNorm = 0.2685, lr_0 = 2.8886e-04
Loss = 1.0616e-02, PNorm = 178.7082, GNorm = 0.1909, lr_0 = 2.8866e-04
Loss = 1.0751e-02, PNorm = 178.7195, GNorm = 0.3099, lr_0 = 2.8846e-04
Loss = 9.2312e-03, PNorm = 178.7260, GNorm = 0.2199, lr_0 = 2.8826e-04
Loss = 1.0628e-02, PNorm = 178.7344, GNorm = 0.1618, lr_0 = 2.8807e-04
Loss = 1.0457e-02, PNorm = 178.7444, GNorm = 0.2218, lr_0 = 2.8787e-04
Loss = 1.4244e-02, PNorm = 178.7542, GNorm = 0.1660, lr_0 = 2.8767e-04
Loss = 9.7402e-03, PNorm = 178.7621, GNorm = 0.2709, lr_0 = 2.8748e-04
Loss = 7.9751e-03, PNorm = 178.7692, GNorm = 0.1330, lr_0 = 2.8728e-04
Loss = 8.5910e-03, PNorm = 178.7766, GNorm = 0.1424, lr_0 = 2.8708e-04
Loss = 8.5037e-03, PNorm = 178.7862, GNorm = 0.2421, lr_0 = 2.8689e-04
Loss = 7.0785e-03, PNorm = 178.7972, GNorm = 0.1246, lr_0 = 2.8669e-04
Loss = 8.5570e-03, PNorm = 178.8079, GNorm = 0.1271, lr_0 = 2.8649e-04
Loss = 8.4499e-03, PNorm = 178.8177, GNorm = 0.1882, lr_0 = 2.8630e-04
Loss = 7.7548e-03, PNorm = 178.8245, GNorm = 0.1573, lr_0 = 2.8610e-04
Loss = 9.8960e-03, PNorm = 178.8297, GNorm = 0.2421, lr_0 = 2.8590e-04
Loss = 1.0312e-02, PNorm = 178.8357, GNorm = 0.1217, lr_0 = 2.8571e-04
Loss = 1.0593e-02, PNorm = 178.8434, GNorm = 0.1182, lr_0 = 2.8551e-04
Loss = 8.6502e-03, PNorm = 178.8503, GNorm = 0.1731, lr_0 = 2.8532e-04
Loss = 8.4741e-03, PNorm = 178.8597, GNorm = 0.3370, lr_0 = 2.8512e-04
Loss = 7.3429e-03, PNorm = 178.8671, GNorm = 0.2068, lr_0 = 2.8493e-04
Loss = 6.5504e-03, PNorm = 178.8759, GNorm = 0.3286, lr_0 = 2.8473e-04
Loss = 7.9610e-03, PNorm = 178.8853, GNorm = 0.1561, lr_0 = 2.8454e-04
Loss = 1.1464e-02, PNorm = 178.8941, GNorm = 0.3157, lr_0 = 2.8434e-04
Loss = 8.1676e-03, PNorm = 178.9017, GNorm = 0.2427, lr_0 = 2.8415e-04
Loss = 1.3799e-02, PNorm = 178.9109, GNorm = 0.5047, lr_0 = 2.8395e-04
Loss = 1.3988e-02, PNorm = 178.9204, GNorm = 0.1986, lr_0 = 2.8376e-04
Loss = 7.0136e-03, PNorm = 178.9299, GNorm = 0.2539, lr_0 = 2.8356e-04
Loss = 9.7957e-03, PNorm = 178.9382, GNorm = 0.2373, lr_0 = 2.8337e-04
Loss = 1.1440e-02, PNorm = 178.9451, GNorm = 0.2980, lr_0 = 2.8317e-04
Loss = 8.5527e-03, PNorm = 178.9504, GNorm = 0.1485, lr_0 = 2.8298e-04
Loss = 1.0477e-02, PNorm = 178.9578, GNorm = 0.4003, lr_0 = 2.8279e-04
Loss = 7.0006e-03, PNorm = 178.9667, GNorm = 0.2158, lr_0 = 2.8259e-04
Loss = 9.0911e-03, PNorm = 178.9739, GNorm = 0.1468, lr_0 = 2.8240e-04
Loss = 7.2645e-03, PNorm = 178.9806, GNorm = 0.1958, lr_0 = 2.8221e-04
Loss = 1.2269e-02, PNorm = 178.9887, GNorm = 0.3060, lr_0 = 2.8201e-04
Loss = 6.9004e-03, PNorm = 178.9978, GNorm = 0.4564, lr_0 = 2.8182e-04
Loss = 7.7371e-03, PNorm = 179.0056, GNorm = 0.2176, lr_0 = 2.8163e-04
Loss = 9.6311e-03, PNorm = 179.0139, GNorm = 0.1797, lr_0 = 2.8143e-04
Loss = 7.1808e-03, PNorm = 179.0248, GNorm = 0.1922, lr_0 = 2.8124e-04
Loss = 6.9168e-03, PNorm = 179.0346, GNorm = 0.1792, lr_0 = 2.8105e-04
Loss = 8.9301e-03, PNorm = 179.0449, GNorm = 0.2044, lr_0 = 2.8085e-04
Loss = 9.2093e-03, PNorm = 179.0573, GNorm = 0.5036, lr_0 = 2.8066e-04
Loss = 1.0389e-02, PNorm = 179.0683, GNorm = 0.3683, lr_0 = 2.8047e-04
Loss = 1.4180e-02, PNorm = 179.0763, GNorm = 0.4538, lr_0 = 2.8028e-04
Loss = 1.4652e-02, PNorm = 179.0855, GNorm = 0.1510, lr_0 = 2.8009e-04
Loss = 8.4429e-03, PNorm = 179.0922, GNorm = 0.5507, lr_0 = 2.7989e-04
Loss = 6.5521e-03, PNorm = 179.0985, GNorm = 0.1060, lr_0 = 2.7970e-04
Loss = 1.0236e-02, PNorm = 179.1061, GNorm = 0.1968, lr_0 = 2.7951e-04
Loss = 1.1338e-02, PNorm = 179.1176, GNorm = 0.3208, lr_0 = 2.7932e-04
Loss = 1.0506e-02, PNorm = 179.1271, GNorm = 0.2425, lr_0 = 2.7913e-04
Loss = 1.2453e-02, PNorm = 179.1377, GNorm = 0.2608, lr_0 = 2.7894e-04
Loss = 6.9215e-03, PNorm = 179.1470, GNorm = 0.1435, lr_0 = 2.7875e-04
Loss = 8.9499e-03, PNorm = 179.1581, GNorm = 0.2492, lr_0 = 2.7855e-04
Loss = 8.1534e-03, PNorm = 179.1683, GNorm = 0.4247, lr_0 = 2.7836e-04
Loss = 5.9292e-03, PNorm = 179.1758, GNorm = 0.1646, lr_0 = 2.7817e-04
Loss = 9.4877e-03, PNorm = 179.1828, GNorm = 0.1202, lr_0 = 2.7798e-04
Loss = 7.1892e-03, PNorm = 179.1942, GNorm = 0.2774, lr_0 = 2.7779e-04
Loss = 1.0295e-02, PNorm = 179.2019, GNorm = 0.5715, lr_0 = 2.7760e-04
Loss = 7.5991e-03, PNorm = 179.2103, GNorm = 0.3011, lr_0 = 2.7741e-04
Loss = 9.1797e-03, PNorm = 179.2198, GNorm = 0.2525, lr_0 = 2.7722e-04
Loss = 8.8884e-03, PNorm = 179.2293, GNorm = 0.1311, lr_0 = 2.7703e-04
Loss = 7.2797e-03, PNorm = 179.2366, GNorm = 0.3101, lr_0 = 2.7684e-04
Loss = 6.8471e-03, PNorm = 179.2478, GNorm = 0.1920, lr_0 = 2.7665e-04
Loss = 6.7038e-03, PNorm = 179.2589, GNorm = 0.0968, lr_0 = 2.7646e-04
Loss = 9.7896e-03, PNorm = 179.2698, GNorm = 0.1761, lr_0 = 2.7627e-04
Loss = 7.0052e-03, PNorm = 179.2793, GNorm = 0.2135, lr_0 = 2.7608e-04
Loss = 6.8307e-03, PNorm = 179.2876, GNorm = 0.3357, lr_0 = 2.7590e-04
Loss = 1.1551e-02, PNorm = 179.2921, GNorm = 0.2661, lr_0 = 2.7571e-04
Loss = 7.5672e-03, PNorm = 179.2982, GNorm = 0.2290, lr_0 = 2.7552e-04
Loss = 1.0600e-02, PNorm = 179.3056, GNorm = 0.2553, lr_0 = 2.7533e-04
Loss = 8.7968e-03, PNorm = 179.3161, GNorm = 0.1596, lr_0 = 2.7514e-04
Loss = 1.0892e-02, PNorm = 179.3249, GNorm = 0.3065, lr_0 = 2.7495e-04
Loss = 1.0238e-02, PNorm = 179.3359, GNorm = 0.2015, lr_0 = 2.7476e-04
Loss = 6.9732e-03, PNorm = 179.3465, GNorm = 0.2225, lr_0 = 2.7457e-04
Loss = 7.9925e-03, PNorm = 179.3562, GNorm = 0.1661, lr_0 = 2.7439e-04
Loss = 6.4986e-03, PNorm = 179.3668, GNorm = 0.1791, lr_0 = 2.7420e-04
Loss = 1.3769e-02, PNorm = 179.3765, GNorm = 0.2813, lr_0 = 2.7401e-04
Loss = 9.7469e-03, PNorm = 179.3878, GNorm = 0.2344, lr_0 = 2.7382e-04
Loss = 8.4423e-03, PNorm = 179.3991, GNorm = 0.2970, lr_0 = 2.7364e-04
Loss = 7.3538e-03, PNorm = 179.4109, GNorm = 0.1690, lr_0 = 2.7345e-04
Loss = 1.6217e-02, PNorm = 179.4177, GNorm = 0.2930, lr_0 = 2.7326e-04
Loss = 7.3952e-03, PNorm = 179.4294, GNorm = 0.1783, lr_0 = 2.7307e-04
Loss = 2.0240e-02, PNorm = 179.4413, GNorm = 0.2373, lr_0 = 2.7289e-04
Loss = 1.0693e-02, PNorm = 179.4484, GNorm = 0.1322, lr_0 = 2.7270e-04
Loss = 1.2717e-02, PNorm = 179.4590, GNorm = 0.5024, lr_0 = 2.7251e-04
Loss = 9.0416e-03, PNorm = 179.4688, GNorm = 0.3279, lr_0 = 2.7233e-04
Loss = 7.7266e-03, PNorm = 179.4792, GNorm = 0.1435, lr_0 = 2.7214e-04
Loss = 6.8008e-03, PNorm = 179.4889, GNorm = 0.2706, lr_0 = 2.7195e-04
Loss = 8.4676e-03, PNorm = 179.4965, GNorm = 0.3390, lr_0 = 2.7177e-04
Loss = 1.1073e-02, PNorm = 179.5050, GNorm = 0.1763, lr_0 = 2.7158e-04
Loss = 1.2247e-02, PNorm = 179.5117, GNorm = 0.1522, lr_0 = 2.7139e-04
Loss = 1.2081e-02, PNorm = 179.5202, GNorm = 0.1236, lr_0 = 2.7121e-04
Loss = 1.4848e-02, PNorm = 179.5303, GNorm = 0.1107, lr_0 = 2.7102e-04
Loss = 1.2154e-02, PNorm = 179.5420, GNorm = 0.1230, lr_0 = 2.7084e-04
Loss = 6.9430e-03, PNorm = 179.5512, GNorm = 0.3783, lr_0 = 2.7065e-04
Loss = 7.2864e-03, PNorm = 179.5628, GNorm = 0.2237, lr_0 = 2.7047e-04
Loss = 1.3646e-02, PNorm = 179.5734, GNorm = 0.1702, lr_0 = 2.7028e-04
Loss = 1.1706e-02, PNorm = 179.5813, GNorm = 0.1831, lr_0 = 2.7010e-04
Loss = 1.1071e-02, PNorm = 179.5910, GNorm = 0.3276, lr_0 = 2.6991e-04
Loss = 6.0809e-03, PNorm = 179.5996, GNorm = 0.3499, lr_0 = 2.6973e-04
Loss = 1.3691e-02, PNorm = 179.6092, GNorm = 0.5226, lr_0 = 2.6954e-04
Loss = 1.3385e-02, PNorm = 179.6249, GNorm = 1.1651, lr_0 = 2.6936e-04
Loss = 1.3622e-02, PNorm = 179.6363, GNorm = 0.1990, lr_0 = 2.6917e-04
Loss = 9.3980e-03, PNorm = 179.6502, GNorm = 0.1658, lr_0 = 2.6899e-04
Loss = 8.1233e-03, PNorm = 179.6615, GNorm = 0.3444, lr_0 = 2.6880e-04
Loss = 7.0210e-03, PNorm = 179.6696, GNorm = 0.1003, lr_0 = 2.6862e-04
Loss = 7.6260e-03, PNorm = 179.6802, GNorm = 0.1910, lr_0 = 2.6844e-04
Loss = 7.1276e-03, PNorm = 179.6905, GNorm = 0.1727, lr_0 = 2.6825e-04
Validation mae = 0.121294
Epoch 18
Loss = 7.3634e-03, PNorm = 179.6999, GNorm = 0.1414, lr_0 = 2.6807e-04
Loss = 7.1572e-03, PNorm = 179.7062, GNorm = 0.1836, lr_0 = 2.6788e-04
Loss = 8.8540e-03, PNorm = 179.7124, GNorm = 0.1855, lr_0 = 2.6770e-04
Loss = 5.8864e-03, PNorm = 179.7175, GNorm = 0.1734, lr_0 = 2.6752e-04
Loss = 6.5496e-03, PNorm = 179.7239, GNorm = 0.1418, lr_0 = 2.6733e-04
Loss = 5.7547e-03, PNorm = 179.7298, GNorm = 0.1145, lr_0 = 2.6715e-04
Loss = 6.9161e-03, PNorm = 179.7378, GNorm = 0.2681, lr_0 = 2.6697e-04
Loss = 6.6709e-03, PNorm = 179.7465, GNorm = 0.1322, lr_0 = 2.6678e-04
Loss = 6.6411e-03, PNorm = 179.7549, GNorm = 0.2671, lr_0 = 2.6660e-04
Loss = 8.0704e-03, PNorm = 179.7602, GNorm = 0.1359, lr_0 = 2.6642e-04
Loss = 6.0033e-03, PNorm = 179.7701, GNorm = 0.2750, lr_0 = 2.6624e-04
Loss = 6.8390e-03, PNorm = 179.7779, GNorm = 0.1943, lr_0 = 2.6605e-04
Loss = 8.5116e-03, PNorm = 179.7866, GNorm = 0.1005, lr_0 = 2.6587e-04
Loss = 6.1797e-03, PNorm = 179.7941, GNorm = 0.2747, lr_0 = 2.6569e-04
Loss = 9.9197e-03, PNorm = 179.8012, GNorm = 0.1744, lr_0 = 2.6551e-04
Loss = 6.1718e-03, PNorm = 179.8076, GNorm = 0.1282, lr_0 = 2.6533e-04
Loss = 8.0784e-03, PNorm = 179.8135, GNorm = 0.3768, lr_0 = 2.6514e-04
Loss = 6.3063e-03, PNorm = 179.8192, GNorm = 0.1169, lr_0 = 2.6496e-04
Loss = 1.4584e-02, PNorm = 179.8279, GNorm = 0.1598, lr_0 = 2.6478e-04
Loss = 8.4468e-03, PNorm = 179.8350, GNorm = 0.1087, lr_0 = 2.6460e-04
Loss = 1.2906e-02, PNorm = 179.8383, GNorm = 0.4412, lr_0 = 2.6442e-04
Loss = 7.1569e-03, PNorm = 179.8466, GNorm = 0.1403, lr_0 = 2.6424e-04
Loss = 1.1614e-02, PNorm = 179.8538, GNorm = 0.3337, lr_0 = 2.6406e-04
Loss = 6.4825e-03, PNorm = 179.8601, GNorm = 0.1362, lr_0 = 2.6388e-04
Loss = 8.1736e-03, PNorm = 179.8695, GNorm = 0.2923, lr_0 = 2.6369e-04
Loss = 6.3253e-03, PNorm = 179.8751, GNorm = 0.1953, lr_0 = 2.6351e-04
Loss = 6.2056e-03, PNorm = 179.8820, GNorm = 0.1716, lr_0 = 2.6333e-04
Loss = 6.0084e-03, PNorm = 179.8886, GNorm = 0.0888, lr_0 = 2.6315e-04
Loss = 8.2921e-03, PNorm = 179.8962, GNorm = 0.1083, lr_0 = 2.6297e-04
Loss = 8.9519e-03, PNorm = 179.9039, GNorm = 0.1779, lr_0 = 2.6279e-04
Loss = 7.8048e-03, PNorm = 179.9103, GNorm = 0.3870, lr_0 = 2.6261e-04
Loss = 9.9333e-03, PNorm = 179.9164, GNorm = 0.3551, lr_0 = 2.6243e-04
Loss = 6.8018e-03, PNorm = 179.9230, GNorm = 0.2561, lr_0 = 2.6225e-04
Loss = 1.1777e-02, PNorm = 179.9279, GNorm = 0.3031, lr_0 = 2.6207e-04
Loss = 7.3887e-03, PNorm = 179.9355, GNorm = 0.3330, lr_0 = 2.6189e-04
Loss = 7.4925e-03, PNorm = 179.9435, GNorm = 0.4005, lr_0 = 2.6171e-04
Loss = 1.2313e-02, PNorm = 179.9521, GNorm = 0.2140, lr_0 = 2.6153e-04
Loss = 6.8583e-03, PNorm = 179.9576, GNorm = 0.1240, lr_0 = 2.6136e-04
Loss = 1.2158e-02, PNorm = 179.9641, GNorm = 0.2459, lr_0 = 2.6118e-04
Loss = 6.4643e-03, PNorm = 179.9730, GNorm = 0.2031, lr_0 = 2.6100e-04
Loss = 1.1797e-02, PNorm = 179.9810, GNorm = 0.2510, lr_0 = 2.6082e-04
Loss = 1.0667e-02, PNorm = 179.9881, GNorm = 0.5904, lr_0 = 2.6064e-04
Loss = 7.2386e-03, PNorm = 179.9937, GNorm = 0.2086, lr_0 = 2.6046e-04
Loss = 7.5155e-03, PNorm = 180.0008, GNorm = 0.2565, lr_0 = 2.6028e-04
Loss = 5.4216e-03, PNorm = 180.0108, GNorm = 0.2626, lr_0 = 2.6011e-04
Loss = 8.3544e-03, PNorm = 180.0180, GNorm = 0.5531, lr_0 = 2.5993e-04
Loss = 5.7644e-03, PNorm = 180.0246, GNorm = 0.1341, lr_0 = 2.5975e-04
Loss = 1.6496e-02, PNorm = 180.0281, GNorm = 0.4141, lr_0 = 2.5957e-04
Loss = 1.0300e-02, PNorm = 180.0374, GNorm = 0.3019, lr_0 = 2.5939e-04
Loss = 6.4319e-03, PNorm = 180.0423, GNorm = 0.2212, lr_0 = 2.5922e-04
Loss = 9.3343e-03, PNorm = 180.0511, GNorm = 0.4043, lr_0 = 2.5904e-04
Loss = 1.0855e-02, PNorm = 180.0663, GNorm = 0.1953, lr_0 = 2.5886e-04
Loss = 6.4715e-03, PNorm = 180.0767, GNorm = 0.3833, lr_0 = 2.5868e-04
Loss = 8.8367e-03, PNorm = 180.0869, GNorm = 0.1165, lr_0 = 2.5851e-04
Loss = 6.9425e-03, PNorm = 180.0957, GNorm = 0.1021, lr_0 = 2.5833e-04
Loss = 7.4398e-03, PNorm = 180.1017, GNorm = 0.2639, lr_0 = 2.5815e-04
Loss = 6.1486e-03, PNorm = 180.1097, GNorm = 0.1833, lr_0 = 2.5797e-04
Loss = 1.2590e-02, PNorm = 180.1161, GNorm = 0.4487, lr_0 = 2.5780e-04
Loss = 5.8858e-03, PNorm = 180.1212, GNorm = 0.1468, lr_0 = 2.5762e-04
Loss = 6.1771e-03, PNorm = 180.1272, GNorm = 0.1856, lr_0 = 2.5745e-04
Loss = 7.4109e-03, PNorm = 180.1337, GNorm = 0.3821, lr_0 = 2.5727e-04
Loss = 1.0549e-02, PNorm = 180.1432, GNorm = 0.1769, lr_0 = 2.5709e-04
Loss = 6.2793e-03, PNorm = 180.1501, GNorm = 0.1096, lr_0 = 2.5692e-04
Loss = 1.1223e-02, PNorm = 180.1569, GNorm = 0.2087, lr_0 = 2.5674e-04
Loss = 6.4315e-03, PNorm = 180.1630, GNorm = 0.3460, lr_0 = 2.5656e-04
Loss = 5.7208e-03, PNorm = 180.1694, GNorm = 0.1935, lr_0 = 2.5639e-04
Loss = 8.2063e-03, PNorm = 180.1760, GNorm = 0.7246, lr_0 = 2.5621e-04
Loss = 5.3543e-03, PNorm = 180.1808, GNorm = 0.3107, lr_0 = 2.5604e-04
Loss = 7.4696e-03, PNorm = 180.1877, GNorm = 0.2467, lr_0 = 2.5586e-04
Loss = 1.3557e-02, PNorm = 180.1950, GNorm = 0.2127, lr_0 = 2.5569e-04
Loss = 5.8946e-03, PNorm = 180.2025, GNorm = 0.1465, lr_0 = 2.5551e-04
Loss = 1.2055e-02, PNorm = 180.2110, GNorm = 0.3500, lr_0 = 2.5534e-04
Loss = 1.0745e-02, PNorm = 180.2206, GNorm = 0.1228, lr_0 = 2.5516e-04
Loss = 5.9344e-03, PNorm = 180.2297, GNorm = 0.1718, lr_0 = 2.5499e-04
Loss = 6.2316e-03, PNorm = 180.2371, GNorm = 0.2883, lr_0 = 2.5481e-04
Loss = 6.5206e-03, PNorm = 180.2474, GNorm = 0.1072, lr_0 = 2.5464e-04
Loss = 8.9729e-03, PNorm = 180.2566, GNorm = 0.2012, lr_0 = 2.5446e-04
Loss = 8.2907e-03, PNorm = 180.2643, GNorm = 0.1124, lr_0 = 2.5429e-04
Loss = 5.9052e-03, PNorm = 180.2724, GNorm = 0.1389, lr_0 = 2.5411e-04
Loss = 1.1309e-02, PNorm = 180.2804, GNorm = 0.2342, lr_0 = 2.5394e-04
Loss = 6.4811e-03, PNorm = 180.2873, GNorm = 0.2363, lr_0 = 2.5377e-04
Loss = 1.1115e-02, PNorm = 180.2928, GNorm = 0.1208, lr_0 = 2.5359e-04
Loss = 8.0772e-03, PNorm = 180.2976, GNorm = 0.4086, lr_0 = 2.5342e-04
Loss = 7.2272e-03, PNorm = 180.3052, GNorm = 0.2064, lr_0 = 2.5325e-04
Loss = 1.1089e-02, PNorm = 180.3138, GNorm = 0.2539, lr_0 = 2.5307e-04
Loss = 5.4383e-03, PNorm = 180.3221, GNorm = 0.1317, lr_0 = 2.5290e-04
Loss = 1.0442e-02, PNorm = 180.3290, GNorm = 0.1973, lr_0 = 2.5273e-04
Loss = 7.9075e-03, PNorm = 180.3379, GNorm = 0.1960, lr_0 = 2.5255e-04
Loss = 6.0842e-03, PNorm = 180.3444, GNorm = 0.2100, lr_0 = 2.5238e-04
Loss = 9.7225e-03, PNorm = 180.3527, GNorm = 0.1812, lr_0 = 2.5221e-04
Loss = 4.9075e-03, PNorm = 180.3601, GNorm = 0.4673, lr_0 = 2.5203e-04
Loss = 9.5007e-03, PNorm = 180.3683, GNorm = 0.1775, lr_0 = 2.5186e-04
Loss = 6.5421e-03, PNorm = 180.3723, GNorm = 0.1040, lr_0 = 2.5169e-04
Loss = 5.5528e-03, PNorm = 180.3772, GNorm = 0.2200, lr_0 = 2.5152e-04
Loss = 7.0455e-03, PNorm = 180.3836, GNorm = 0.1296, lr_0 = 2.5134e-04
Loss = 1.4320e-02, PNorm = 180.3891, GNorm = 0.2436, lr_0 = 2.5117e-04
Loss = 7.0670e-03, PNorm = 180.4006, GNorm = 0.5158, lr_0 = 2.5100e-04
Loss = 1.2926e-02, PNorm = 180.4141, GNorm = 0.4065, lr_0 = 2.5083e-04
Loss = 1.1238e-02, PNorm = 180.4221, GNorm = 0.1161, lr_0 = 2.5066e-04
Loss = 6.8442e-03, PNorm = 180.4288, GNorm = 0.1430, lr_0 = 2.5048e-04
Loss = 5.4378e-03, PNorm = 180.4377, GNorm = 0.1414, lr_0 = 2.5031e-04
Loss = 6.2258e-03, PNorm = 180.4478, GNorm = 0.1370, lr_0 = 2.5014e-04
Loss = 1.2811e-02, PNorm = 180.4590, GNorm = 0.0856, lr_0 = 2.4997e-04
Loss = 7.5875e-03, PNorm = 180.4689, GNorm = 0.1656, lr_0 = 2.4980e-04
Loss = 5.7904e-03, PNorm = 180.4790, GNorm = 0.2105, lr_0 = 2.4963e-04
Loss = 6.5357e-03, PNorm = 180.4873, GNorm = 0.1855, lr_0 = 2.4946e-04
Loss = 6.4083e-03, PNorm = 180.4934, GNorm = 0.1260, lr_0 = 2.4929e-04
Loss = 9.3152e-03, PNorm = 180.5026, GNorm = 0.1019, lr_0 = 2.4911e-04
Loss = 1.2483e-02, PNorm = 180.5123, GNorm = 0.3428, lr_0 = 2.4894e-04
Loss = 7.2976e-03, PNorm = 180.5217, GNorm = 0.1840, lr_0 = 2.4877e-04
Loss = 9.3318e-03, PNorm = 180.5296, GNorm = 0.0970, lr_0 = 2.4860e-04
Loss = 9.9946e-03, PNorm = 180.5372, GNorm = 0.1056, lr_0 = 2.4843e-04
Loss = 5.0553e-03, PNorm = 180.5436, GNorm = 0.0897, lr_0 = 2.4826e-04
Loss = 5.4848e-03, PNorm = 180.5491, GNorm = 0.2718, lr_0 = 2.4809e-04
Loss = 6.3922e-03, PNorm = 180.5556, GNorm = 0.0895, lr_0 = 2.4792e-04
Loss = 6.1249e-03, PNorm = 180.5644, GNorm = 0.1240, lr_0 = 2.4775e-04
Loss = 5.1876e-03, PNorm = 180.5728, GNorm = 0.2556, lr_0 = 2.4758e-04
Loss = 1.2358e-02, PNorm = 180.5788, GNorm = 0.2409, lr_0 = 2.4741e-04
Loss = 5.2140e-03, PNorm = 180.5840, GNorm = 0.2019, lr_0 = 2.4724e-04
Loss = 9.2858e-03, PNorm = 180.5902, GNorm = 0.1718, lr_0 = 2.4707e-04
Validation mae = 0.121508
Epoch 19
Loss = 1.0619e-02, PNorm = 180.5942, GNorm = 0.2633, lr_0 = 2.4690e-04
Loss = 7.0108e-03, PNorm = 180.5997, GNorm = 0.1053, lr_0 = 2.4674e-04
Loss = 6.4755e-03, PNorm = 180.6053, GNorm = 0.1609, lr_0 = 2.4657e-04
Loss = 5.2334e-03, PNorm = 180.6117, GNorm = 0.2824, lr_0 = 2.4640e-04
Loss = 1.0500e-02, PNorm = 180.6175, GNorm = 0.1650, lr_0 = 2.4623e-04
Loss = 5.1771e-03, PNorm = 180.6214, GNorm = 0.1441, lr_0 = 2.4606e-04
Loss = 5.8881e-03, PNorm = 180.6292, GNorm = 0.2008, lr_0 = 2.4589e-04
Loss = 5.6248e-03, PNorm = 180.6358, GNorm = 0.1637, lr_0 = 2.4572e-04
Loss = 5.5762e-03, PNorm = 180.6419, GNorm = 0.1784, lr_0 = 2.4556e-04
Loss = 5.6402e-03, PNorm = 180.6496, GNorm = 0.1237, lr_0 = 2.4539e-04
Loss = 5.4737e-03, PNorm = 180.6559, GNorm = 0.0864, lr_0 = 2.4522e-04
Loss = 4.7611e-03, PNorm = 180.6600, GNorm = 0.2003, lr_0 = 2.4505e-04
Loss = 6.0075e-03, PNorm = 180.6660, GNorm = 0.7740, lr_0 = 2.4488e-04
Loss = 5.8843e-03, PNorm = 180.6710, GNorm = 0.1310, lr_0 = 2.4472e-04
Loss = 4.2603e-03, PNorm = 180.6743, GNorm = 0.1529, lr_0 = 2.4455e-04
Loss = 5.8147e-03, PNorm = 180.6794, GNorm = 0.2740, lr_0 = 2.4438e-04
Loss = 4.9444e-03, PNorm = 180.6852, GNorm = 0.1196, lr_0 = 2.4421e-04
Loss = 7.7119e-03, PNorm = 180.6914, GNorm = 0.9516, lr_0 = 2.4405e-04
Loss = 5.4348e-03, PNorm = 180.6964, GNorm = 0.0951, lr_0 = 2.4388e-04
Loss = 8.4436e-03, PNorm = 180.7068, GNorm = 0.2351, lr_0 = 2.4371e-04
Loss = 9.3454e-03, PNorm = 180.7140, GNorm = 0.1264, lr_0 = 2.4354e-04
Loss = 7.0335e-03, PNorm = 180.7204, GNorm = 0.1820, lr_0 = 2.4338e-04
Loss = 6.4098e-03, PNorm = 180.7254, GNorm = 0.1166, lr_0 = 2.4321e-04
Loss = 9.3289e-03, PNorm = 180.7309, GNorm = 0.1274, lr_0 = 2.4304e-04
Loss = 5.5475e-03, PNorm = 180.7365, GNorm = 0.0924, lr_0 = 2.4288e-04
Loss = 5.1764e-03, PNorm = 180.7417, GNorm = 0.1799, lr_0 = 2.4271e-04
Loss = 8.0865e-03, PNorm = 180.7470, GNorm = 0.1381, lr_0 = 2.4254e-04
Loss = 5.1061e-03, PNorm = 180.7526, GNorm = 0.4818, lr_0 = 2.4238e-04
Loss = 5.4626e-03, PNorm = 180.7576, GNorm = 0.2000, lr_0 = 2.4221e-04
Loss = 6.1115e-03, PNorm = 180.7621, GNorm = 0.1832, lr_0 = 2.4205e-04
Loss = 8.4955e-03, PNorm = 180.7684, GNorm = 0.2154, lr_0 = 2.4188e-04
Loss = 5.4158e-03, PNorm = 180.7756, GNorm = 0.2161, lr_0 = 2.4171e-04
Loss = 6.5267e-03, PNorm = 180.7805, GNorm = 0.2853, lr_0 = 2.4155e-04
Loss = 8.4530e-03, PNorm = 180.7875, GNorm = 0.2534, lr_0 = 2.4138e-04
Loss = 3.8518e-03, PNorm = 180.7926, GNorm = 0.1605, lr_0 = 2.4122e-04
Loss = 5.0787e-03, PNorm = 180.7983, GNorm = 0.1811, lr_0 = 2.4105e-04
Loss = 4.9342e-03, PNorm = 180.8020, GNorm = 0.0973, lr_0 = 2.4089e-04
Loss = 5.2444e-03, PNorm = 180.8059, GNorm = 0.1962, lr_0 = 2.4072e-04
Loss = 5.3404e-03, PNorm = 180.8107, GNorm = 0.1312, lr_0 = 2.4056e-04
Loss = 9.7208e-03, PNorm = 180.8159, GNorm = 0.2093, lr_0 = 2.4039e-04
Loss = 4.6834e-03, PNorm = 180.8198, GNorm = 0.1668, lr_0 = 2.4023e-04
Loss = 6.8799e-03, PNorm = 180.8260, GNorm = 0.2428, lr_0 = 2.4006e-04
Loss = 4.7274e-03, PNorm = 180.8333, GNorm = 0.2001, lr_0 = 2.3990e-04
Loss = 6.1007e-03, PNorm = 180.8431, GNorm = 0.1272, lr_0 = 2.3974e-04
Loss = 7.9878e-03, PNorm = 180.8507, GNorm = 0.1029, lr_0 = 2.3957e-04
Loss = 7.3925e-03, PNorm = 180.8557, GNorm = 0.3532, lr_0 = 2.3941e-04
Loss = 5.5126e-03, PNorm = 180.8647, GNorm = 0.1519, lr_0 = 2.3924e-04
Loss = 9.5199e-03, PNorm = 180.8740, GNorm = 0.1728, lr_0 = 2.3908e-04
Loss = 5.8423e-03, PNorm = 180.8804, GNorm = 0.1555, lr_0 = 2.3892e-04
Loss = 7.2849e-03, PNorm = 180.8859, GNorm = 0.0846, lr_0 = 2.3875e-04
Loss = 8.9829e-03, PNorm = 180.8913, GNorm = 0.1678, lr_0 = 2.3859e-04
Loss = 8.0163e-03, PNorm = 180.8983, GNorm = 0.5185, lr_0 = 2.3842e-04
Loss = 6.3536e-03, PNorm = 180.9064, GNorm = 0.2935, lr_0 = 2.3826e-04
Loss = 4.5919e-03, PNorm = 180.9108, GNorm = 0.2158, lr_0 = 2.3810e-04
Loss = 6.3614e-03, PNorm = 180.9179, GNorm = 0.1225, lr_0 = 2.3794e-04
Loss = 5.8265e-03, PNorm = 180.9232, GNorm = 0.1301, lr_0 = 2.3777e-04
Loss = 7.9452e-03, PNorm = 180.9309, GNorm = 0.1563, lr_0 = 2.3761e-04
Loss = 1.4184e-02, PNorm = 180.9362, GNorm = 0.1674, lr_0 = 2.3745e-04
Loss = 1.2456e-02, PNorm = 180.9401, GNorm = 0.2721, lr_0 = 2.3728e-04
Loss = 1.0222e-02, PNorm = 180.9465, GNorm = 0.1743, lr_0 = 2.3712e-04
Loss = 3.8580e-03, PNorm = 180.9512, GNorm = 0.1144, lr_0 = 2.3696e-04
Loss = 1.1143e-02, PNorm = 180.9585, GNorm = 0.1080, lr_0 = 2.3680e-04
Loss = 9.2272e-03, PNorm = 180.9627, GNorm = 0.5195, lr_0 = 2.3663e-04
Loss = 8.8700e-03, PNorm = 180.9707, GNorm = 0.4568, lr_0 = 2.3647e-04
Loss = 4.9061e-03, PNorm = 180.9763, GNorm = 0.1475, lr_0 = 2.3631e-04
Loss = 3.9191e-03, PNorm = 180.9824, GNorm = 0.1159, lr_0 = 2.3615e-04
Loss = 5.6771e-03, PNorm = 180.9877, GNorm = 0.3349, lr_0 = 2.3599e-04
Loss = 5.1515e-03, PNorm = 180.9941, GNorm = 0.2759, lr_0 = 2.3582e-04
Loss = 4.8733e-03, PNorm = 181.0019, GNorm = 0.1820, lr_0 = 2.3566e-04
Loss = 5.3551e-03, PNorm = 181.0074, GNorm = 0.1487, lr_0 = 2.3550e-04
Loss = 7.2691e-03, PNorm = 181.0127, GNorm = 0.1751, lr_0 = 2.3534e-04
Loss = 1.4135e-02, PNorm = 181.0203, GNorm = 0.4827, lr_0 = 2.3518e-04
Loss = 5.7680e-03, PNorm = 181.0274, GNorm = 0.1810, lr_0 = 2.3502e-04
Loss = 4.9733e-03, PNorm = 181.0337, GNorm = 0.3167, lr_0 = 2.3486e-04
Loss = 2.0456e-02, PNorm = 181.0444, GNorm = 0.2210, lr_0 = 2.3470e-04
Loss = 1.1008e-02, PNorm = 181.0491, GNorm = 0.1538, lr_0 = 2.3454e-04
Loss = 9.7968e-03, PNorm = 181.0535, GNorm = 0.2683, lr_0 = 2.3437e-04
Loss = 8.4197e-03, PNorm = 181.0602, GNorm = 0.1975, lr_0 = 2.3421e-04
Loss = 6.6397e-03, PNorm = 181.0641, GNorm = 0.1130, lr_0 = 2.3405e-04
Loss = 7.4070e-03, PNorm = 181.0692, GNorm = 0.1722, lr_0 = 2.3389e-04
Loss = 4.9005e-03, PNorm = 181.0741, GNorm = 0.2250, lr_0 = 2.3373e-04
Loss = 5.1574e-03, PNorm = 181.0814, GNorm = 0.2077, lr_0 = 2.3357e-04
Loss = 6.6070e-03, PNorm = 181.0888, GNorm = 0.1637, lr_0 = 2.3341e-04
Loss = 6.9891e-03, PNorm = 181.0959, GNorm = 0.1939, lr_0 = 2.3325e-04
Loss = 8.5354e-03, PNorm = 181.1011, GNorm = 0.2229, lr_0 = 2.3309e-04
Loss = 9.1414e-03, PNorm = 181.1058, GNorm = 0.1973, lr_0 = 2.3293e-04
Loss = 1.1216e-02, PNorm = 181.1121, GNorm = 0.2836, lr_0 = 2.3277e-04
Loss = 7.3980e-03, PNorm = 181.1178, GNorm = 0.1364, lr_0 = 2.3261e-04
Loss = 4.4627e-03, PNorm = 181.1247, GNorm = 0.2116, lr_0 = 2.3246e-04
Loss = 6.1512e-03, PNorm = 181.1315, GNorm = 0.1400, lr_0 = 2.3230e-04
Loss = 6.9195e-03, PNorm = 181.1385, GNorm = 0.1120, lr_0 = 2.3214e-04
Loss = 9.3495e-03, PNorm = 181.1441, GNorm = 0.1109, lr_0 = 2.3198e-04
Loss = 8.3326e-03, PNorm = 181.1491, GNorm = 0.2597, lr_0 = 2.3182e-04
Loss = 4.3100e-03, PNorm = 181.1541, GNorm = 0.1202, lr_0 = 2.3166e-04
Loss = 1.3712e-02, PNorm = 181.1595, GNorm = 0.1014, lr_0 = 2.3150e-04
Loss = 4.8293e-03, PNorm = 181.1638, GNorm = 0.1216, lr_0 = 2.3134e-04
Loss = 8.0337e-03, PNorm = 181.1714, GNorm = 0.2401, lr_0 = 2.3118e-04
Loss = 4.4402e-03, PNorm = 181.1783, GNorm = 0.2833, lr_0 = 2.3103e-04
Loss = 5.9925e-03, PNorm = 181.1840, GNorm = 0.5010, lr_0 = 2.3087e-04
Loss = 8.3037e-03, PNorm = 181.1891, GNorm = 0.1491, lr_0 = 2.3071e-04
Loss = 6.0964e-03, PNorm = 181.1938, GNorm = 0.1764, lr_0 = 2.3055e-04
Loss = 7.6250e-03, PNorm = 181.1980, GNorm = 0.1827, lr_0 = 2.3039e-04
Loss = 6.4214e-03, PNorm = 181.2060, GNorm = 0.1504, lr_0 = 2.3024e-04
Loss = 7.4296e-03, PNorm = 181.2134, GNorm = 0.1456, lr_0 = 2.3008e-04
Loss = 1.0448e-02, PNorm = 181.2190, GNorm = 0.1544, lr_0 = 2.2992e-04
Loss = 5.1046e-03, PNorm = 181.2265, GNorm = 0.1590, lr_0 = 2.2976e-04
Loss = 1.0409e-02, PNorm = 181.2329, GNorm = 0.5427, lr_0 = 2.2961e-04
Loss = 5.6094e-03, PNorm = 181.2395, GNorm = 0.1252, lr_0 = 2.2945e-04
Loss = 7.0712e-03, PNorm = 181.2439, GNorm = 0.1551, lr_0 = 2.2929e-04
Loss = 1.2179e-02, PNorm = 181.2490, GNorm = 0.5975, lr_0 = 2.2913e-04
Loss = 8.4464e-03, PNorm = 181.2544, GNorm = 0.1926, lr_0 = 2.2898e-04
Loss = 5.2587e-03, PNorm = 181.2621, GNorm = 0.3674, lr_0 = 2.2882e-04
Loss = 5.3501e-03, PNorm = 181.2689, GNorm = 0.1778, lr_0 = 2.2866e-04
Loss = 8.5026e-03, PNorm = 181.2770, GNorm = 0.2488, lr_0 = 2.2851e-04
Loss = 8.4065e-03, PNorm = 181.2858, GNorm = 0.0986, lr_0 = 2.2835e-04
Loss = 7.8169e-03, PNorm = 181.2924, GNorm = 0.4173, lr_0 = 2.2819e-04
Loss = 9.0755e-03, PNorm = 181.2997, GNorm = 0.1555, lr_0 = 2.2804e-04
Loss = 4.5401e-03, PNorm = 181.3050, GNorm = 0.1950, lr_0 = 2.2788e-04
Loss = 6.2610e-03, PNorm = 181.3107, GNorm = 0.1765, lr_0 = 2.2773e-04
Loss = 5.8530e-03, PNorm = 181.3171, GNorm = 0.1852, lr_0 = 2.2757e-04
Validation mae = 0.121162
Epoch 20
Loss = 5.2446e-03, PNorm = 181.3219, GNorm = 0.2633, lr_0 = 2.2741e-04
Loss = 7.1506e-03, PNorm = 181.3274, GNorm = 0.1861, lr_0 = 2.2726e-04
Loss = 7.1607e-03, PNorm = 181.3329, GNorm = 0.3069, lr_0 = 2.2710e-04
Loss = 8.4330e-03, PNorm = 181.3364, GNorm = 0.7575, lr_0 = 2.2695e-04
Loss = 8.9862e-03, PNorm = 181.3425, GNorm = 0.1620, lr_0 = 2.2679e-04
Loss = 5.0197e-03, PNorm = 181.3476, GNorm = 0.1571, lr_0 = 2.2664e-04
Loss = 4.2517e-03, PNorm = 181.3538, GNorm = 0.2325, lr_0 = 2.2648e-04
Loss = 8.7487e-03, PNorm = 181.3582, GNorm = 0.1256, lr_0 = 2.2632e-04
Loss = 4.5869e-03, PNorm = 181.3624, GNorm = 0.1046, lr_0 = 2.2617e-04
Loss = 4.7093e-03, PNorm = 181.3669, GNorm = 0.2657, lr_0 = 2.2601e-04
Loss = 6.4960e-03, PNorm = 181.3740, GNorm = 0.1326, lr_0 = 2.2586e-04
Loss = 9.2057e-03, PNorm = 181.3799, GNorm = 0.1389, lr_0 = 2.2571e-04
Loss = 4.4854e-03, PNorm = 181.3854, GNorm = 0.1328, lr_0 = 2.2555e-04
Loss = 7.5740e-03, PNorm = 181.3908, GNorm = 0.1250, lr_0 = 2.2540e-04
Loss = 7.3246e-03, PNorm = 181.3981, GNorm = 0.0922, lr_0 = 2.2524e-04
Loss = 9.8944e-03, PNorm = 181.4043, GNorm = 0.1086, lr_0 = 2.2509e-04
Loss = 8.3932e-03, PNorm = 181.4113, GNorm = 0.2783, lr_0 = 2.2493e-04
Loss = 8.4234e-03, PNorm = 181.4172, GNorm = 0.3378, lr_0 = 2.2478e-04
Loss = 6.0733e-03, PNorm = 181.4203, GNorm = 0.2216, lr_0 = 2.2463e-04
Loss = 9.2707e-03, PNorm = 181.4261, GNorm = 0.0825, lr_0 = 2.2447e-04
Loss = 4.9656e-03, PNorm = 181.4306, GNorm = 0.0880, lr_0 = 2.2432e-04
Loss = 4.6661e-03, PNorm = 181.4345, GNorm = 0.3793, lr_0 = 2.2416e-04
Loss = 4.1637e-03, PNorm = 181.4390, GNorm = 0.0630, lr_0 = 2.2401e-04
Loss = 6.1811e-03, PNorm = 181.4428, GNorm = 0.2245, lr_0 = 2.2386e-04
Loss = 3.7606e-03, PNorm = 181.4479, GNorm = 0.2024, lr_0 = 2.2370e-04
Loss = 4.9216e-03, PNorm = 181.4529, GNorm = 0.1887, lr_0 = 2.2355e-04
Loss = 6.2810e-03, PNorm = 181.4558, GNorm = 0.1204, lr_0 = 2.2340e-04
Loss = 7.8502e-03, PNorm = 181.4590, GNorm = 0.1175, lr_0 = 2.2324e-04
Loss = 7.8385e-03, PNorm = 181.4649, GNorm = 0.1841, lr_0 = 2.2309e-04
Loss = 7.0309e-03, PNorm = 181.4696, GNorm = 0.2291, lr_0 = 2.2294e-04
Loss = 4.8345e-03, PNorm = 181.4750, GNorm = 0.2677, lr_0 = 2.2279e-04
Loss = 7.2239e-03, PNorm = 181.4810, GNorm = 0.3043, lr_0 = 2.2263e-04
Loss = 6.2212e-03, PNorm = 181.4861, GNorm = 0.2847, lr_0 = 2.2248e-04
Loss = 9.2794e-03, PNorm = 181.4941, GNorm = 0.9630, lr_0 = 2.2233e-04
Loss = 7.1805e-03, PNorm = 181.5012, GNorm = 0.1340, lr_0 = 2.2218e-04
Loss = 4.1395e-03, PNorm = 181.5067, GNorm = 0.1784, lr_0 = 2.2202e-04
Loss = 3.7911e-03, PNorm = 181.5115, GNorm = 0.3148, lr_0 = 2.2187e-04
Loss = 4.0044e-03, PNorm = 181.5151, GNorm = 0.3698, lr_0 = 2.2172e-04
Loss = 5.5174e-03, PNorm = 181.5168, GNorm = 0.2174, lr_0 = 2.2157e-04
Loss = 4.3740e-03, PNorm = 181.5214, GNorm = 0.3724, lr_0 = 2.2142e-04
Loss = 5.5779e-03, PNorm = 181.5262, GNorm = 0.1394, lr_0 = 2.2126e-04
Loss = 4.2640e-03, PNorm = 181.5322, GNorm = 0.1395, lr_0 = 2.2111e-04
Loss = 7.5872e-03, PNorm = 181.5359, GNorm = 0.2622, lr_0 = 2.2096e-04
Loss = 6.7846e-03, PNorm = 181.5395, GNorm = 0.1720, lr_0 = 2.2081e-04
Loss = 5.2617e-03, PNorm = 181.5439, GNorm = 0.1982, lr_0 = 2.2066e-04
Loss = 1.0422e-02, PNorm = 181.5483, GNorm = 0.1469, lr_0 = 2.2051e-04
Loss = 4.0487e-03, PNorm = 181.5540, GNorm = 0.0931, lr_0 = 2.2036e-04
Loss = 4.9151e-03, PNorm = 181.5591, GNorm = 0.3320, lr_0 = 2.2021e-04
Loss = 8.9273e-03, PNorm = 181.5626, GNorm = 1.6188, lr_0 = 2.2005e-04
Loss = 4.7571e-03, PNorm = 181.5675, GNorm = 0.2359, lr_0 = 2.1990e-04
Loss = 7.5087e-03, PNorm = 181.5755, GNorm = 0.1088, lr_0 = 2.1975e-04
Loss = 4.2328e-03, PNorm = 181.5817, GNorm = 0.1341, lr_0 = 2.1960e-04
Loss = 6.4442e-03, PNorm = 181.5878, GNorm = 0.1715, lr_0 = 2.1945e-04
Loss = 4.5126e-03, PNorm = 181.5938, GNorm = 0.1297, lr_0 = 2.1930e-04
Loss = 8.0600e-03, PNorm = 181.6002, GNorm = 0.7264, lr_0 = 2.1915e-04
Loss = 5.0427e-03, PNorm = 181.6055, GNorm = 0.0968, lr_0 = 2.1900e-04
Loss = 1.0794e-02, PNorm = 181.6109, GNorm = 0.1810, lr_0 = 2.1885e-04
Loss = 4.3902e-03, PNorm = 181.6157, GNorm = 0.2758, lr_0 = 2.1870e-04
Loss = 6.7186e-03, PNorm = 181.6225, GNorm = 0.0881, lr_0 = 2.1855e-04
Loss = 4.2050e-03, PNorm = 181.6283, GNorm = 0.1018, lr_0 = 2.1840e-04
Loss = 6.0408e-03, PNorm = 181.6349, GNorm = 0.1398, lr_0 = 2.1825e-04
Loss = 3.9382e-03, PNorm = 181.6397, GNorm = 0.1448, lr_0 = 2.1810e-04
Loss = 5.7991e-03, PNorm = 181.6454, GNorm = 0.2526, lr_0 = 2.1795e-04
Loss = 5.8994e-03, PNorm = 181.6503, GNorm = 0.4340, lr_0 = 2.1780e-04
Loss = 3.7064e-03, PNorm = 181.6545, GNorm = 0.2090, lr_0 = 2.1765e-04
Loss = 3.9922e-03, PNorm = 181.6606, GNorm = 0.1903, lr_0 = 2.1751e-04
Loss = 8.0892e-03, PNorm = 181.6660, GNorm = 0.1257, lr_0 = 2.1736e-04
Loss = 4.3547e-03, PNorm = 181.6734, GNorm = 0.3658, lr_0 = 2.1721e-04
Loss = 8.5677e-03, PNorm = 181.6770, GNorm = 0.1562, lr_0 = 2.1706e-04
Loss = 9.9408e-03, PNorm = 181.6849, GNorm = 0.0706, lr_0 = 2.1691e-04
Loss = 8.5640e-03, PNorm = 181.6916, GNorm = 0.1585, lr_0 = 2.1676e-04
Loss = 6.2371e-03, PNorm = 181.6968, GNorm = 0.2218, lr_0 = 2.1661e-04
Loss = 3.6901e-03, PNorm = 181.7025, GNorm = 0.2014, lr_0 = 2.1646e-04
Loss = 5.2341e-03, PNorm = 181.7074, GNorm = 0.1101, lr_0 = 2.1632e-04
Loss = 5.5968e-03, PNorm = 181.7136, GNorm = 0.2011, lr_0 = 2.1617e-04
Loss = 6.8321e-03, PNorm = 181.7194, GNorm = 0.1661, lr_0 = 2.1602e-04
Loss = 1.0838e-02, PNorm = 181.7263, GNorm = 0.1783, lr_0 = 2.1587e-04
Loss = 5.2434e-03, PNorm = 181.7298, GNorm = 0.1876, lr_0 = 2.1572e-04
Loss = 6.4892e-03, PNorm = 181.7370, GNorm = 0.1272, lr_0 = 2.1558e-04
Loss = 7.4890e-03, PNorm = 181.7442, GNorm = 0.2202, lr_0 = 2.1543e-04
Loss = 8.9333e-03, PNorm = 181.7498, GNorm = 0.0710, lr_0 = 2.1528e-04
Loss = 4.8878e-03, PNorm = 181.7570, GNorm = 0.2816, lr_0 = 2.1513e-04
Loss = 3.9345e-03, PNorm = 181.7626, GNorm = 0.1642, lr_0 = 2.1499e-04
Loss = 4.9422e-03, PNorm = 181.7679, GNorm = 0.1057, lr_0 = 2.1484e-04
Loss = 8.3664e-03, PNorm = 181.7732, GNorm = 0.1511, lr_0 = 2.1469e-04
Loss = 3.7857e-03, PNorm = 181.7796, GNorm = 0.1268, lr_0 = 2.1454e-04
Loss = 4.5483e-03, PNorm = 181.7848, GNorm = 0.3631, lr_0 = 2.1440e-04
Loss = 4.1725e-03, PNorm = 181.7887, GNorm = 0.1652, lr_0 = 2.1425e-04
Loss = 1.0294e-02, PNorm = 181.7936, GNorm = 0.4071, lr_0 = 2.1410e-04
Loss = 8.3865e-03, PNorm = 181.7991, GNorm = 0.2031, lr_0 = 2.1396e-04
Loss = 3.9019e-03, PNorm = 181.8058, GNorm = 0.1045, lr_0 = 2.1381e-04
Loss = 7.3884e-03, PNorm = 181.8113, GNorm = 0.1899, lr_0 = 2.1366e-04
Loss = 3.9650e-03, PNorm = 181.8162, GNorm = 0.1134, lr_0 = 2.1352e-04
Loss = 7.1390e-03, PNorm = 181.8217, GNorm = 0.2496, lr_0 = 2.1337e-04
Loss = 4.4232e-03, PNorm = 181.8267, GNorm = 0.0959, lr_0 = 2.1323e-04
Loss = 4.2605e-03, PNorm = 181.8317, GNorm = 0.1157, lr_0 = 2.1308e-04
Loss = 1.1437e-02, PNorm = 181.8397, GNorm = 0.2514, lr_0 = 2.1293e-04
Loss = 8.9714e-03, PNorm = 181.8449, GNorm = 0.1054, lr_0 = 2.1279e-04
Loss = 7.4129e-03, PNorm = 181.8491, GNorm = 0.1459, lr_0 = 2.1264e-04
Loss = 5.9853e-03, PNorm = 181.8577, GNorm = 0.1761, lr_0 = 2.1250e-04
Loss = 5.3281e-03, PNorm = 181.8649, GNorm = 0.2171, lr_0 = 2.1235e-04
Loss = 8.4977e-03, PNorm = 181.8715, GNorm = 0.1531, lr_0 = 2.1221e-04
Loss = 1.9554e-02, PNorm = 181.8766, GNorm = 0.3051, lr_0 = 2.1206e-04
Loss = 6.9924e-03, PNorm = 181.8805, GNorm = 0.1804, lr_0 = 2.1191e-04
Loss = 7.5315e-03, PNorm = 181.8849, GNorm = 0.1305, lr_0 = 2.1177e-04
Loss = 4.2184e-03, PNorm = 181.8926, GNorm = 0.0937, lr_0 = 2.1162e-04
Loss = 1.0469e-02, PNorm = 181.8989, GNorm = 0.2057, lr_0 = 2.1148e-04
Loss = 8.7365e-03, PNorm = 181.9028, GNorm = 0.1734, lr_0 = 2.1133e-04
Loss = 9.5173e-03, PNorm = 181.9057, GNorm = 0.1094, lr_0 = 2.1119e-04
Loss = 4.9806e-03, PNorm = 181.9122, GNorm = 0.0842, lr_0 = 2.1104e-04
Loss = 5.6681e-03, PNorm = 181.9191, GNorm = 0.1812, lr_0 = 2.1090e-04
Loss = 7.9928e-03, PNorm = 181.9262, GNorm = 0.1706, lr_0 = 2.1076e-04
Loss = 5.2380e-03, PNorm = 181.9326, GNorm = 0.1279, lr_0 = 2.1061e-04
Loss = 7.6689e-03, PNorm = 181.9388, GNorm = 0.2595, lr_0 = 2.1047e-04
Loss = 4.4419e-03, PNorm = 181.9442, GNorm = 0.1415, lr_0 = 2.1032e-04
Loss = 7.5791e-03, PNorm = 181.9519, GNorm = 0.1274, lr_0 = 2.1018e-04
Loss = 4.8544e-03, PNorm = 181.9610, GNorm = 0.3387, lr_0 = 2.1003e-04
Loss = 5.1779e-03, PNorm = 181.9682, GNorm = 0.1565, lr_0 = 2.0989e-04
Loss = 7.5432e-03, PNorm = 181.9736, GNorm = 0.1356, lr_0 = 2.0975e-04
Loss = 4.7783e-03, PNorm = 181.9812, GNorm = 0.1514, lr_0 = 2.0960e-04
Validation mae = 0.121291
Epoch 21
Loss = 7.5810e-03, PNorm = 181.9820, GNorm = 0.2439, lr_0 = 2.0946e-04
Loss = 9.1765e-03, PNorm = 181.9850, GNorm = 0.1465, lr_0 = 2.0932e-04
Loss = 6.2138e-03, PNorm = 181.9885, GNorm = 0.1896, lr_0 = 2.0917e-04
Loss = 8.5074e-03, PNorm = 181.9939, GNorm = 0.0891, lr_0 = 2.0903e-04
Loss = 7.1090e-03, PNorm = 181.9981, GNorm = 0.1683, lr_0 = 2.0889e-04
Loss = 4.6971e-03, PNorm = 182.0030, GNorm = 0.2609, lr_0 = 2.0874e-04
Loss = 5.8751e-03, PNorm = 182.0072, GNorm = 0.1496, lr_0 = 2.0860e-04
Loss = 7.3337e-03, PNorm = 182.0108, GNorm = 0.2630, lr_0 = 2.0846e-04
Loss = 4.6839e-03, PNorm = 182.0148, GNorm = 0.2452, lr_0 = 2.0831e-04
Loss = 5.4959e-03, PNorm = 182.0166, GNorm = 0.2795, lr_0 = 2.0817e-04
Loss = 1.1088e-02, PNorm = 182.0230, GNorm = 0.2216, lr_0 = 2.0803e-04
Loss = 4.4346e-03, PNorm = 182.0299, GNorm = 0.2353, lr_0 = 2.0789e-04
Loss = 5.5228e-03, PNorm = 182.0340, GNorm = 0.1088, lr_0 = 2.0774e-04
Loss = 7.3893e-03, PNorm = 182.0384, GNorm = 0.1007, lr_0 = 2.0760e-04
Loss = 3.9417e-03, PNorm = 182.0435, GNorm = 0.0723, lr_0 = 2.0746e-04
Loss = 5.9548e-03, PNorm = 182.0503, GNorm = 0.1306, lr_0 = 2.0732e-04
Loss = 8.1518e-03, PNorm = 182.0532, GNorm = 0.1180, lr_0 = 2.0718e-04
Loss = 5.6310e-03, PNorm = 182.0579, GNorm = 0.1176, lr_0 = 2.0703e-04
Loss = 5.1240e-03, PNorm = 182.0631, GNorm = 0.2348, lr_0 = 2.0689e-04
Loss = 6.8149e-03, PNorm = 182.0664, GNorm = 1.0089, lr_0 = 2.0675e-04
Loss = 5.2667e-03, PNorm = 182.0702, GNorm = 0.1834, lr_0 = 2.0661e-04
Loss = 5.2691e-03, PNorm = 182.0738, GNorm = 0.2368, lr_0 = 2.0647e-04
Loss = 3.7070e-03, PNorm = 182.0785, GNorm = 0.0827, lr_0 = 2.0633e-04
Loss = 4.7458e-03, PNorm = 182.0812, GNorm = 0.6115, lr_0 = 2.0618e-04
Loss = 5.2666e-03, PNorm = 182.0832, GNorm = 0.2594, lr_0 = 2.0604e-04
Loss = 5.7415e-03, PNorm = 182.0860, GNorm = 0.0755, lr_0 = 2.0590e-04
Loss = 3.6156e-03, PNorm = 182.0890, GNorm = 0.3536, lr_0 = 2.0576e-04
Loss = 4.7185e-03, PNorm = 182.0931, GNorm = 0.1069, lr_0 = 2.0562e-04
Loss = 4.9201e-03, PNorm = 182.0977, GNorm = 0.1678, lr_0 = 2.0548e-04
Loss = 4.7845e-03, PNorm = 182.1022, GNorm = 0.1027, lr_0 = 2.0534e-04
Loss = 5.9919e-03, PNorm = 182.1077, GNorm = 0.2155, lr_0 = 2.0520e-04
Loss = 3.4243e-03, PNorm = 182.1090, GNorm = 0.1042, lr_0 = 2.0506e-04
Loss = 3.5177e-03, PNorm = 182.1129, GNorm = 0.2880, lr_0 = 2.0492e-04
Loss = 3.5296e-03, PNorm = 182.1189, GNorm = 0.1498, lr_0 = 2.0478e-04
Loss = 8.0987e-03, PNorm = 182.1235, GNorm = 0.2453, lr_0 = 2.0464e-04
Loss = 7.8112e-03, PNorm = 182.1288, GNorm = 0.2582, lr_0 = 2.0450e-04
Loss = 6.4551e-03, PNorm = 182.1337, GNorm = 0.2082, lr_0 = 2.0436e-04
Loss = 3.6458e-03, PNorm = 182.1386, GNorm = 0.1827, lr_0 = 2.0422e-04
Loss = 1.0517e-02, PNorm = 182.1444, GNorm = 0.5335, lr_0 = 2.0408e-04
Loss = 3.6933e-03, PNorm = 182.1483, GNorm = 0.1317, lr_0 = 2.0394e-04
Loss = 4.0217e-03, PNorm = 182.1526, GNorm = 0.1267, lr_0 = 2.0380e-04
Loss = 5.9247e-03, PNorm = 182.1578, GNorm = 0.1389, lr_0 = 2.0366e-04
Loss = 3.1173e-03, PNorm = 182.1619, GNorm = 0.2509, lr_0 = 2.0352e-04
Loss = 5.5855e-03, PNorm = 182.1660, GNorm = 0.2542, lr_0 = 2.0338e-04
Loss = 6.8358e-03, PNorm = 182.1707, GNorm = 0.1778, lr_0 = 2.0324e-04
Loss = 8.3061e-03, PNorm = 182.1739, GNorm = 0.3266, lr_0 = 2.0310e-04
Loss = 8.9044e-03, PNorm = 182.1780, GNorm = 0.1120, lr_0 = 2.0296e-04
Loss = 3.1469e-03, PNorm = 182.1828, GNorm = 0.1272, lr_0 = 2.0282e-04
Loss = 5.4517e-03, PNorm = 182.1857, GNorm = 0.1662, lr_0 = 2.0268e-04
Loss = 4.2071e-03, PNorm = 182.1909, GNorm = 0.1155, lr_0 = 2.0254e-04
Loss = 6.0218e-03, PNorm = 182.1952, GNorm = 0.1941, lr_0 = 2.0240e-04
Loss = 5.4524e-03, PNorm = 182.1995, GNorm = 0.0970, lr_0 = 2.0227e-04
Loss = 3.1354e-03, PNorm = 182.2050, GNorm = 0.1410, lr_0 = 2.0213e-04
Loss = 3.2676e-03, PNorm = 182.2084, GNorm = 0.0812, lr_0 = 2.0199e-04
Loss = 4.5159e-03, PNorm = 182.2120, GNorm = 0.2658, lr_0 = 2.0185e-04
Loss = 6.2855e-03, PNorm = 182.2145, GNorm = 0.1179, lr_0 = 2.0171e-04
Loss = 6.3063e-03, PNorm = 182.2187, GNorm = 0.0656, lr_0 = 2.0157e-04
Loss = 4.9075e-03, PNorm = 182.2238, GNorm = 0.1649, lr_0 = 2.0144e-04
Loss = 3.3522e-03, PNorm = 182.2262, GNorm = 0.0644, lr_0 = 2.0130e-04
Loss = 3.5148e-03, PNorm = 182.2313, GNorm = 0.0950, lr_0 = 2.0116e-04
Loss = 5.8493e-03, PNorm = 182.2361, GNorm = 0.1387, lr_0 = 2.0102e-04
Loss = 9.8308e-03, PNorm = 182.2405, GNorm = 0.2629, lr_0 = 2.0088e-04
Loss = 5.2611e-03, PNorm = 182.2466, GNorm = 0.3000, lr_0 = 2.0075e-04
Loss = 2.9801e-03, PNorm = 182.2514, GNorm = 0.1925, lr_0 = 2.0061e-04
Loss = 1.2784e-02, PNorm = 182.2562, GNorm = 0.1862, lr_0 = 2.0047e-04
Loss = 5.0764e-03, PNorm = 182.2611, GNorm = 0.3781, lr_0 = 2.0033e-04
Loss = 3.1944e-03, PNorm = 182.2661, GNorm = 0.1265, lr_0 = 2.0020e-04
Loss = 3.3320e-03, PNorm = 182.2718, GNorm = 0.1505, lr_0 = 2.0006e-04
Loss = 4.7202e-03, PNorm = 182.2768, GNorm = 0.0985, lr_0 = 1.9992e-04
Loss = 3.4582e-03, PNorm = 182.2839, GNorm = 0.1578, lr_0 = 1.9979e-04
Loss = 5.7171e-03, PNorm = 182.2910, GNorm = 0.1114, lr_0 = 1.9965e-04
Loss = 6.0633e-03, PNorm = 182.2971, GNorm = 0.2773, lr_0 = 1.9951e-04
Loss = 3.9186e-03, PNorm = 182.3026, GNorm = 0.2996, lr_0 = 1.9938e-04
Loss = 5.9375e-03, PNorm = 182.3065, GNorm = 0.0853, lr_0 = 1.9924e-04
Loss = 4.4234e-03, PNorm = 182.3084, GNorm = 0.1804, lr_0 = 1.9910e-04
Loss = 8.3715e-03, PNorm = 182.3114, GNorm = 0.1655, lr_0 = 1.9897e-04
Loss = 4.4310e-03, PNorm = 182.3149, GNorm = 0.1124, lr_0 = 1.9883e-04
Loss = 5.6599e-03, PNorm = 182.3191, GNorm = 0.1270, lr_0 = 1.9869e-04
Loss = 4.0480e-03, PNorm = 182.3229, GNorm = 0.1354, lr_0 = 1.9856e-04
Loss = 6.5805e-03, PNorm = 182.3265, GNorm = 0.1574, lr_0 = 1.9842e-04
Loss = 1.0414e-02, PNorm = 182.3327, GNorm = 0.3310, lr_0 = 1.9829e-04
Loss = 8.3709e-03, PNorm = 182.3394, GNorm = 0.1377, lr_0 = 1.9815e-04
Loss = 7.7523e-03, PNorm = 182.3443, GNorm = 0.2321, lr_0 = 1.9801e-04
Loss = 4.4121e-03, PNorm = 182.3514, GNorm = 0.0843, lr_0 = 1.9788e-04
Loss = 6.6572e-03, PNorm = 182.3542, GNorm = 0.0881, lr_0 = 1.9774e-04
Loss = 4.0917e-03, PNorm = 182.3591, GNorm = 0.2112, lr_0 = 1.9761e-04
Loss = 3.9752e-03, PNorm = 182.3639, GNorm = 0.1385, lr_0 = 1.9747e-04
Loss = 3.8135e-03, PNorm = 182.3686, GNorm = 0.1734, lr_0 = 1.9734e-04
Loss = 6.3200e-03, PNorm = 182.3750, GNorm = 0.0982, lr_0 = 1.9720e-04
Loss = 5.2363e-03, PNorm = 182.3796, GNorm = 0.1937, lr_0 = 1.9707e-04
Loss = 4.4993e-03, PNorm = 182.3834, GNorm = 0.1521, lr_0 = 1.9693e-04
Loss = 4.2842e-03, PNorm = 182.3882, GNorm = 0.1541, lr_0 = 1.9680e-04
Loss = 8.7882e-03, PNorm = 182.3954, GNorm = 0.3916, lr_0 = 1.9666e-04
Loss = 3.8757e-03, PNorm = 182.3996, GNorm = 0.0552, lr_0 = 1.9653e-04
Loss = 4.3046e-03, PNorm = 182.4030, GNorm = 0.2412, lr_0 = 1.9639e-04
Loss = 3.5614e-03, PNorm = 182.4053, GNorm = 0.1559, lr_0 = 1.9626e-04
Loss = 2.8963e-03, PNorm = 182.4085, GNorm = 0.1619, lr_0 = 1.9612e-04
Loss = 8.5904e-03, PNorm = 182.4113, GNorm = 0.2362, lr_0 = 1.9599e-04
Loss = 6.9748e-03, PNorm = 182.4135, GNorm = 0.2497, lr_0 = 1.9585e-04
Loss = 8.3484e-03, PNorm = 182.4165, GNorm = 0.3978, lr_0 = 1.9572e-04
Loss = 5.1430e-03, PNorm = 182.4199, GNorm = 0.1442, lr_0 = 1.9559e-04
Loss = 1.3340e-02, PNorm = 182.4214, GNorm = 0.1574, lr_0 = 1.9545e-04
Loss = 4.3275e-03, PNorm = 182.4274, GNorm = 0.1946, lr_0 = 1.9532e-04
Loss = 2.0665e-02, PNorm = 182.4355, GNorm = 0.2509, lr_0 = 1.9518e-04
Loss = 5.3455e-03, PNorm = 182.4411, GNorm = 0.2047, lr_0 = 1.9505e-04
Loss = 6.7187e-03, PNorm = 182.4490, GNorm = 0.9237, lr_0 = 1.9492e-04
Loss = 5.4527e-03, PNorm = 182.4548, GNorm = 0.1450, lr_0 = 1.9478e-04
Loss = 7.7327e-03, PNorm = 182.4594, GNorm = 0.2512, lr_0 = 1.9465e-04
Loss = 6.3351e-03, PNorm = 182.4634, GNorm = 0.1065, lr_0 = 1.9452e-04
Loss = 6.2729e-03, PNorm = 182.4688, GNorm = 0.2251, lr_0 = 1.9438e-04
Loss = 4.7526e-03, PNorm = 182.4751, GNorm = 0.0807, lr_0 = 1.9425e-04
Loss = 6.9835e-03, PNorm = 182.4816, GNorm = 0.2327, lr_0 = 1.9412e-04
Loss = 8.4530e-03, PNorm = 182.4855, GNorm = 0.3556, lr_0 = 1.9398e-04
Loss = 5.5607e-03, PNorm = 182.4888, GNorm = 0.1797, lr_0 = 1.9385e-04
Loss = 3.3953e-03, PNorm = 182.4949, GNorm = 0.0859, lr_0 = 1.9372e-04
Loss = 4.5434e-03, PNorm = 182.4983, GNorm = 0.2579, lr_0 = 1.9359e-04
Loss = 4.1193e-03, PNorm = 182.5030, GNorm = 0.1844, lr_0 = 1.9345e-04
Loss = 6.0057e-03, PNorm = 182.5056, GNorm = 0.1843, lr_0 = 1.9332e-04
Loss = 4.2369e-03, PNorm = 182.5090, GNorm = 0.1665, lr_0 = 1.9319e-04
Loss = 4.3921e-03, PNorm = 182.5132, GNorm = 0.1135, lr_0 = 1.9306e-04
Validation mae = 0.121264
Epoch 22
Loss = 5.3597e-03, PNorm = 182.5183, GNorm = 0.6263, lr_0 = 1.9292e-04
Loss = 6.5843e-03, PNorm = 182.5222, GNorm = 0.0790, lr_0 = 1.9279e-04
Loss = 3.0681e-03, PNorm = 182.5246, GNorm = 0.0926, lr_0 = 1.9266e-04
Loss = 4.9670e-03, PNorm = 182.5257, GNorm = 0.5464, lr_0 = 1.9253e-04
Loss = 3.1108e-03, PNorm = 182.5280, GNorm = 0.1215, lr_0 = 1.9240e-04
Loss = 5.9814e-03, PNorm = 182.5311, GNorm = 0.4167, lr_0 = 1.9226e-04
Loss = 4.5679e-03, PNorm = 182.5347, GNorm = 0.2023, lr_0 = 1.9213e-04
Loss = 3.1844e-03, PNorm = 182.5393, GNorm = 0.1022, lr_0 = 1.9200e-04
Loss = 5.1217e-03, PNorm = 182.5421, GNorm = 0.1117, lr_0 = 1.9187e-04
Loss = 3.0334e-03, PNorm = 182.5445, GNorm = 0.1568, lr_0 = 1.9174e-04
Loss = 3.9292e-03, PNorm = 182.5473, GNorm = 0.4490, lr_0 = 1.9161e-04
Loss = 3.3681e-03, PNorm = 182.5501, GNorm = 0.1491, lr_0 = 1.9148e-04
Loss = 3.5036e-03, PNorm = 182.5525, GNorm = 0.1373, lr_0 = 1.9134e-04
Loss = 7.4356e-03, PNorm = 182.5577, GNorm = 0.1246, lr_0 = 1.9121e-04
Loss = 5.4227e-03, PNorm = 182.5622, GNorm = 0.1152, lr_0 = 1.9108e-04
Loss = 4.3778e-03, PNorm = 182.5642, GNorm = 0.1354, lr_0 = 1.9095e-04
Loss = 3.4156e-03, PNorm = 182.5669, GNorm = 0.1645, lr_0 = 1.9082e-04
Loss = 6.9600e-03, PNorm = 182.5700, GNorm = 0.1086, lr_0 = 1.9069e-04
Loss = 3.2175e-03, PNorm = 182.5730, GNorm = 0.1387, lr_0 = 1.9056e-04
Loss = 3.3886e-03, PNorm = 182.5751, GNorm = 0.2016, lr_0 = 1.9043e-04
Loss = 4.2418e-03, PNorm = 182.5777, GNorm = 0.1021, lr_0 = 1.9030e-04
Loss = 4.0778e-03, PNorm = 182.5788, GNorm = 0.1338, lr_0 = 1.9017e-04
Loss = 6.5439e-03, PNorm = 182.5816, GNorm = 0.2262, lr_0 = 1.9004e-04
Loss = 6.2207e-03, PNorm = 182.5852, GNorm = 0.2303, lr_0 = 1.8991e-04
Loss = 5.0220e-03, PNorm = 182.5886, GNorm = 0.1660, lr_0 = 1.8978e-04
Loss = 5.1541e-03, PNorm = 182.5921, GNorm = 0.1258, lr_0 = 1.8965e-04
Loss = 5.8600e-03, PNorm = 182.5945, GNorm = 0.1953, lr_0 = 1.8952e-04
Loss = 4.7918e-03, PNorm = 182.5980, GNorm = 0.2301, lr_0 = 1.8939e-04
Loss = 6.2761e-03, PNorm = 182.6017, GNorm = 0.1508, lr_0 = 1.8926e-04
Loss = 3.0457e-03, PNorm = 182.6070, GNorm = 0.2813, lr_0 = 1.8913e-04
Loss = 4.9337e-03, PNorm = 182.6080, GNorm = 0.2346, lr_0 = 1.8900e-04
Loss = 6.7810e-03, PNorm = 182.6114, GNorm = 0.7638, lr_0 = 1.8887e-04
Loss = 3.9421e-03, PNorm = 182.6147, GNorm = 0.1792, lr_0 = 1.8874e-04
Loss = 4.8223e-03, PNorm = 182.6156, GNorm = 0.3614, lr_0 = 1.8861e-04
Loss = 4.5517e-03, PNorm = 182.6183, GNorm = 0.0599, lr_0 = 1.8848e-04
Loss = 5.0120e-03, PNorm = 182.6227, GNorm = 0.1502, lr_0 = 1.8835e-04
Loss = 3.1137e-03, PNorm = 182.6291, GNorm = 0.1728, lr_0 = 1.8822e-04
Loss = 2.4616e-03, PNorm = 182.6338, GNorm = 0.1322, lr_0 = 1.8809e-04
Loss = 9.1958e-03, PNorm = 182.6376, GNorm = 0.2312, lr_0 = 1.8797e-04
Loss = 4.0095e-03, PNorm = 182.6418, GNorm = 0.2313, lr_0 = 1.8784e-04
Loss = 2.9490e-03, PNorm = 182.6450, GNorm = 0.0934, lr_0 = 1.8771e-04
Loss = 5.4981e-03, PNorm = 182.6505, GNorm = 0.1303, lr_0 = 1.8758e-04
Loss = 5.5008e-03, PNorm = 182.6549, GNorm = 0.1189, lr_0 = 1.8745e-04
Loss = 3.1075e-03, PNorm = 182.6594, GNorm = 0.1004, lr_0 = 1.8732e-04
Loss = 4.3730e-03, PNorm = 182.6631, GNorm = 0.1306, lr_0 = 1.8719e-04
Loss = 6.5303e-03, PNorm = 182.6671, GNorm = 0.5547, lr_0 = 1.8707e-04
Loss = 5.0615e-03, PNorm = 182.6709, GNorm = 0.1012, lr_0 = 1.8694e-04
Loss = 4.3819e-03, PNorm = 182.6746, GNorm = 0.1400, lr_0 = 1.8681e-04
Loss = 3.4411e-03, PNorm = 182.6786, GNorm = 0.3771, lr_0 = 1.8668e-04
Loss = 6.5533e-03, PNorm = 182.6816, GNorm = 0.0839, lr_0 = 1.8655e-04
Loss = 6.8628e-03, PNorm = 182.6862, GNorm = 0.2156, lr_0 = 1.8643e-04
Loss = 6.4538e-03, PNorm = 182.6914, GNorm = 0.1047, lr_0 = 1.8630e-04
Loss = 1.2689e-02, PNorm = 182.6959, GNorm = 0.2139, lr_0 = 1.8617e-04
Loss = 5.6321e-03, PNorm = 182.6997, GNorm = 0.3287, lr_0 = 1.8604e-04
Loss = 1.7326e-02, PNorm = 182.7005, GNorm = 0.1705, lr_0 = 1.8592e-04
Loss = 8.8436e-03, PNorm = 182.7049, GNorm = 0.1658, lr_0 = 1.8579e-04
Loss = 3.3388e-03, PNorm = 182.7087, GNorm = 0.1773, lr_0 = 1.8566e-04
Loss = 7.0281e-03, PNorm = 182.7125, GNorm = 0.2433, lr_0 = 1.8553e-04
Loss = 1.0502e-02, PNorm = 182.7167, GNorm = 0.1589, lr_0 = 1.8541e-04
Loss = 3.1861e-03, PNorm = 182.7203, GNorm = 0.2091, lr_0 = 1.8528e-04
Loss = 5.3744e-03, PNorm = 182.7241, GNorm = 0.0961, lr_0 = 1.8515e-04
Loss = 1.2843e-02, PNorm = 182.7286, GNorm = 0.2404, lr_0 = 1.8503e-04
Loss = 3.0014e-03, PNorm = 182.7315, GNorm = 0.0690, lr_0 = 1.8490e-04
Loss = 3.0456e-03, PNorm = 182.7362, GNorm = 0.1352, lr_0 = 1.8477e-04
Loss = 5.1210e-03, PNorm = 182.7396, GNorm = 0.1202, lr_0 = 1.8465e-04
Loss = 7.7280e-03, PNorm = 182.7426, GNorm = 0.1038, lr_0 = 1.8452e-04
Loss = 4.9908e-03, PNorm = 182.7447, GNorm = 0.2498, lr_0 = 1.8439e-04
Loss = 2.9002e-03, PNorm = 182.7477, GNorm = 0.2535, lr_0 = 1.8427e-04
Loss = 2.7105e-03, PNorm = 182.7513, GNorm = 0.1242, lr_0 = 1.8414e-04
Loss = 3.1226e-03, PNorm = 182.7556, GNorm = 0.1015, lr_0 = 1.8401e-04
Loss = 3.5100e-03, PNorm = 182.7583, GNorm = 0.1468, lr_0 = 1.8389e-04
Loss = 4.6300e-03, PNorm = 182.7615, GNorm = 0.1150, lr_0 = 1.8376e-04
Loss = 3.9188e-03, PNorm = 182.7658, GNorm = 0.2214, lr_0 = 1.8364e-04
Loss = 3.6895e-03, PNorm = 182.7709, GNorm = 0.0992, lr_0 = 1.8351e-04
Loss = 4.1280e-03, PNorm = 182.7763, GNorm = 0.1248, lr_0 = 1.8338e-04
Loss = 9.5631e-03, PNorm = 182.7800, GNorm = 0.3960, lr_0 = 1.8326e-04
Loss = 5.7870e-03, PNorm = 182.7835, GNorm = 0.3884, lr_0 = 1.8313e-04
Loss = 5.6271e-03, PNorm = 182.7855, GNorm = 0.2598, lr_0 = 1.8301e-04
Loss = 2.6866e-03, PNorm = 182.7874, GNorm = 0.1140, lr_0 = 1.8288e-04
Loss = 3.6430e-03, PNorm = 182.7899, GNorm = 0.0804, lr_0 = 1.8276e-04
Loss = 7.0461e-03, PNorm = 182.7955, GNorm = 0.0993, lr_0 = 1.8263e-04
Loss = 3.4242e-03, PNorm = 182.8005, GNorm = 0.1251, lr_0 = 1.8251e-04
Loss = 4.6213e-03, PNorm = 182.8040, GNorm = 0.3499, lr_0 = 1.8238e-04
Loss = 4.5169e-03, PNorm = 182.8096, GNorm = 0.1235, lr_0 = 1.8226e-04
Loss = 3.1759e-03, PNorm = 182.8128, GNorm = 0.2840, lr_0 = 1.8213e-04
Loss = 1.1245e-02, PNorm = 182.8165, GNorm = 0.1848, lr_0 = 1.8201e-04
Loss = 4.1292e-03, PNorm = 182.8197, GNorm = 0.1707, lr_0 = 1.8188e-04
Loss = 8.0270e-03, PNorm = 182.8235, GNorm = 0.0813, lr_0 = 1.8176e-04
Loss = 4.0209e-03, PNorm = 182.8265, GNorm = 0.1561, lr_0 = 1.8163e-04
Loss = 2.9024e-03, PNorm = 182.8313, GNorm = 0.1014, lr_0 = 1.8151e-04
Loss = 3.9996e-03, PNorm = 182.8352, GNorm = 0.1275, lr_0 = 1.8138e-04
Loss = 6.4824e-03, PNorm = 182.8391, GNorm = 0.2067, lr_0 = 1.8126e-04
Loss = 8.3263e-03, PNorm = 182.8418, GNorm = 0.2441, lr_0 = 1.8114e-04
Loss = 5.2346e-03, PNorm = 182.8463, GNorm = 0.0957, lr_0 = 1.8101e-04
Loss = 3.0756e-03, PNorm = 182.8520, GNorm = 0.0746, lr_0 = 1.8089e-04
Loss = 8.4836e-03, PNorm = 182.8565, GNorm = 0.0644, lr_0 = 1.8076e-04
Loss = 2.8413e-03, PNorm = 182.8607, GNorm = 0.2752, lr_0 = 1.8064e-04
Loss = 2.5325e-03, PNorm = 182.8650, GNorm = 0.1578, lr_0 = 1.8052e-04
Loss = 5.9585e-03, PNorm = 182.8686, GNorm = 0.2417, lr_0 = 1.8039e-04
Loss = 2.5792e-03, PNorm = 182.8721, GNorm = 0.1476, lr_0 = 1.8027e-04
Loss = 2.8710e-03, PNorm = 182.8752, GNorm = 0.0671, lr_0 = 1.8015e-04
Loss = 6.9698e-03, PNorm = 182.8793, GNorm = 0.0795, lr_0 = 1.8002e-04
Loss = 4.3106e-03, PNorm = 182.8844, GNorm = 0.1049, lr_0 = 1.7990e-04
Loss = 4.1909e-03, PNorm = 182.8883, GNorm = 0.0984, lr_0 = 1.7978e-04
Loss = 6.5596e-03, PNorm = 182.8929, GNorm = 0.1029, lr_0 = 1.7965e-04
Loss = 4.3770e-03, PNorm = 182.8967, GNorm = 0.2418, lr_0 = 1.7953e-04
Loss = 3.4862e-03, PNorm = 182.8986, GNorm = 0.2438, lr_0 = 1.7941e-04
Loss = 4.8017e-03, PNorm = 182.9016, GNorm = 0.0833, lr_0 = 1.7928e-04
Loss = 2.5703e-03, PNorm = 182.9056, GNorm = 0.1698, lr_0 = 1.7916e-04
Loss = 2.8177e-03, PNorm = 182.9080, GNorm = 0.0781, lr_0 = 1.7904e-04
Loss = 4.4073e-03, PNorm = 182.9117, GNorm = 0.1776, lr_0 = 1.7892e-04
Loss = 4.5645e-03, PNorm = 182.9153, GNorm = 0.1920, lr_0 = 1.7879e-04
Loss = 5.6851e-03, PNorm = 182.9179, GNorm = 0.1878, lr_0 = 1.7867e-04
Loss = 3.9623e-03, PNorm = 182.9221, GNorm = 0.0898, lr_0 = 1.7855e-04
Loss = 5.3655e-03, PNorm = 182.9268, GNorm = 0.6580, lr_0 = 1.7843e-04
Loss = 7.5589e-03, PNorm = 182.9300, GNorm = 0.0947, lr_0 = 1.7830e-04
Loss = 4.9778e-03, PNorm = 182.9357, GNorm = 0.2149, lr_0 = 1.7818e-04
Loss = 2.7880e-03, PNorm = 182.9410, GNorm = 0.1346, lr_0 = 1.7806e-04
Loss = 3.1296e-03, PNorm = 182.9465, GNorm = 0.1061, lr_0 = 1.7794e-04
Loss = 7.6549e-03, PNorm = 182.9505, GNorm = 0.1134, lr_0 = 1.7782e-04
Validation mae = 0.121167
Epoch 23
Loss = 5.6610e-03, PNorm = 182.9537, GNorm = 0.0769, lr_0 = 1.7769e-04
Loss = 1.0014e-02, PNorm = 182.9558, GNorm = 0.1391, lr_0 = 1.7757e-04
Loss = 3.8401e-03, PNorm = 182.9581, GNorm = 0.1158, lr_0 = 1.7745e-04
Loss = 3.4839e-03, PNorm = 182.9619, GNorm = 0.1519, lr_0 = 1.7733e-04
Loss = 5.3403e-03, PNorm = 182.9643, GNorm = 0.0771, lr_0 = 1.7721e-04
Loss = 2.8120e-03, PNorm = 182.9674, GNorm = 0.1130, lr_0 = 1.7709e-04
Loss = 4.7280e-03, PNorm = 182.9695, GNorm = 0.1675, lr_0 = 1.7696e-04
Loss = 3.5605e-03, PNorm = 182.9725, GNorm = 0.0717, lr_0 = 1.7684e-04
Loss = 2.7047e-03, PNorm = 182.9764, GNorm = 0.0848, lr_0 = 1.7672e-04
Loss = 4.3389e-03, PNorm = 182.9777, GNorm = 0.0988, lr_0 = 1.7660e-04
Loss = 7.0310e-03, PNorm = 182.9813, GNorm = 0.1762, lr_0 = 1.7648e-04
Loss = 5.0780e-03, PNorm = 182.9849, GNorm = 0.0896, lr_0 = 1.7636e-04
Loss = 2.7174e-03, PNorm = 182.9878, GNorm = 0.1322, lr_0 = 1.7624e-04
Loss = 2.9615e-03, PNorm = 182.9894, GNorm = 0.1407, lr_0 = 1.7612e-04
Loss = 2.2846e-03, PNorm = 182.9898, GNorm = 0.0697, lr_0 = 1.7600e-04
Loss = 7.8262e-03, PNorm = 182.9924, GNorm = 0.1410, lr_0 = 1.7588e-04
Loss = 6.3866e-03, PNorm = 182.9955, GNorm = 0.1284, lr_0 = 1.7576e-04
Loss = 2.9757e-03, PNorm = 182.9989, GNorm = 0.0899, lr_0 = 1.7564e-04
Loss = 2.5848e-03, PNorm = 183.0028, GNorm = 0.0856, lr_0 = 1.7552e-04
Loss = 2.7488e-03, PNorm = 183.0056, GNorm = 0.1767, lr_0 = 1.7540e-04
Loss = 5.7059e-03, PNorm = 183.0103, GNorm = 0.1623, lr_0 = 1.7528e-04
Loss = 5.3651e-03, PNorm = 183.0134, GNorm = 0.1703, lr_0 = 1.7516e-04
Loss = 2.9858e-03, PNorm = 183.0180, GNorm = 0.0582, lr_0 = 1.7504e-04
Loss = 5.4755e-03, PNorm = 183.0202, GNorm = 0.1249, lr_0 = 1.7492e-04
Loss = 3.8056e-03, PNorm = 183.0219, GNorm = 0.1053, lr_0 = 1.7480e-04
Loss = 2.4611e-03, PNorm = 183.0235, GNorm = 0.0633, lr_0 = 1.7468e-04
Loss = 3.2597e-03, PNorm = 183.0253, GNorm = 0.0993, lr_0 = 1.7456e-04
Loss = 2.9239e-03, PNorm = 183.0264, GNorm = 0.0568, lr_0 = 1.7444e-04
Loss = 3.1832e-03, PNorm = 183.0289, GNorm = 0.1197, lr_0 = 1.7432e-04
Loss = 2.5418e-03, PNorm = 183.0312, GNorm = 0.0728, lr_0 = 1.7420e-04
Loss = 7.4086e-03, PNorm = 183.0344, GNorm = 0.0995, lr_0 = 1.7408e-04
Loss = 4.1964e-03, PNorm = 183.0369, GNorm = 0.1255, lr_0 = 1.7396e-04
Loss = 4.2708e-03, PNorm = 183.0406, GNorm = 0.1042, lr_0 = 1.7384e-04
Loss = 4.4060e-03, PNorm = 183.0437, GNorm = 0.1894, lr_0 = 1.7372e-04
Loss = 3.3803e-03, PNorm = 183.0465, GNorm = 0.1402, lr_0 = 1.7360e-04
Loss = 4.9996e-03, PNorm = 183.0498, GNorm = 0.1018, lr_0 = 1.7348e-04
Loss = 5.8017e-03, PNorm = 183.0525, GNorm = 0.2340, lr_0 = 1.7336e-04
Loss = 3.0140e-03, PNorm = 183.0578, GNorm = 0.1294, lr_0 = 1.7325e-04
Loss = 2.6982e-03, PNorm = 183.0620, GNorm = 0.1534, lr_0 = 1.7313e-04
Loss = 4.1227e-03, PNorm = 183.0650, GNorm = 0.0545, lr_0 = 1.7301e-04
Loss = 4.7937e-03, PNorm = 183.0670, GNorm = 0.1121, lr_0 = 1.7289e-04
Loss = 9.1014e-03, PNorm = 183.0688, GNorm = 0.7049, lr_0 = 1.7277e-04
Loss = 2.3118e-03, PNorm = 183.0714, GNorm = 0.1689, lr_0 = 1.7265e-04
Loss = 4.2260e-03, PNorm = 183.0745, GNorm = 0.1354, lr_0 = 1.7253e-04
Loss = 5.5708e-03, PNorm = 183.0761, GNorm = 0.0841, lr_0 = 1.7242e-04
Loss = 2.8339e-03, PNorm = 183.0795, GNorm = 0.0649, lr_0 = 1.7230e-04
Loss = 5.1833e-03, PNorm = 183.0822, GNorm = 0.0779, lr_0 = 1.7218e-04
Loss = 4.4447e-03, PNorm = 183.0858, GNorm = 0.2304, lr_0 = 1.7206e-04
Loss = 4.4459e-03, PNorm = 183.0894, GNorm = 0.0708, lr_0 = 1.7194e-04
Loss = 4.6632e-03, PNorm = 183.0919, GNorm = 0.1988, lr_0 = 1.7183e-04
Loss = 5.8932e-03, PNorm = 183.0958, GNorm = 0.2153, lr_0 = 1.7171e-04
Loss = 2.5484e-03, PNorm = 183.0998, GNorm = 0.0932, lr_0 = 1.7159e-04
Loss = 2.6918e-03, PNorm = 183.1033, GNorm = 0.0959, lr_0 = 1.7147e-04
Loss = 3.7924e-03, PNorm = 183.1065, GNorm = 0.1607, lr_0 = 1.7136e-04
Loss = 5.1741e-03, PNorm = 183.1071, GNorm = 0.1019, lr_0 = 1.7124e-04
Loss = 3.1554e-03, PNorm = 183.1086, GNorm = 0.1429, lr_0 = 1.7112e-04
Loss = 4.6070e-03, PNorm = 183.1119, GNorm = 0.1739, lr_0 = 1.7100e-04
Loss = 5.5428e-03, PNorm = 183.1142, GNorm = 0.1911, lr_0 = 1.7089e-04
Loss = 4.1809e-03, PNorm = 183.1167, GNorm = 0.1069, lr_0 = 1.7077e-04
Loss = 2.8867e-03, PNorm = 183.1194, GNorm = 0.1126, lr_0 = 1.7065e-04
Loss = 2.7363e-03, PNorm = 183.1220, GNorm = 0.1923, lr_0 = 1.7054e-04
Loss = 5.6159e-03, PNorm = 183.1236, GNorm = 0.2010, lr_0 = 1.7042e-04
Loss = 2.4219e-03, PNorm = 183.1277, GNorm = 0.0785, lr_0 = 1.7030e-04
Loss = 2.8078e-03, PNorm = 183.1321, GNorm = 0.1672, lr_0 = 1.7019e-04
Loss = 3.6626e-03, PNorm = 183.1367, GNorm = 0.1462, lr_0 = 1.7007e-04
Loss = 9.3047e-03, PNorm = 183.1428, GNorm = 0.1095, lr_0 = 1.6995e-04
Loss = 3.0872e-03, PNorm = 183.1468, GNorm = 0.1558, lr_0 = 1.6984e-04
Loss = 3.7930e-03, PNorm = 183.1496, GNorm = 0.0741, lr_0 = 1.6972e-04
Loss = 2.9663e-03, PNorm = 183.1528, GNorm = 0.1677, lr_0 = 1.6960e-04
Loss = 1.3189e-02, PNorm = 183.1559, GNorm = 0.4599, lr_0 = 1.6949e-04
Loss = 2.8143e-03, PNorm = 183.1574, GNorm = 0.1487, lr_0 = 1.6937e-04
Loss = 3.7240e-03, PNorm = 183.1607, GNorm = 0.2458, lr_0 = 1.6926e-04
Loss = 6.9769e-03, PNorm = 183.1637, GNorm = 0.1780, lr_0 = 1.6914e-04
Loss = 5.8349e-03, PNorm = 183.1686, GNorm = 0.1055, lr_0 = 1.6902e-04
Loss = 5.0713e-03, PNorm = 183.1746, GNorm = 0.1805, lr_0 = 1.6891e-04
Loss = 4.9072e-03, PNorm = 183.1797, GNorm = 0.2620, lr_0 = 1.6879e-04
Loss = 1.9289e-03, PNorm = 183.1819, GNorm = 0.0738, lr_0 = 1.6868e-04
Loss = 5.4816e-03, PNorm = 183.1829, GNorm = 0.1212, lr_0 = 1.6856e-04
Loss = 9.3844e-03, PNorm = 183.1846, GNorm = 0.1154, lr_0 = 1.6845e-04
Loss = 2.1441e-03, PNorm = 183.1854, GNorm = 0.0789, lr_0 = 1.6833e-04
Loss = 3.5136e-03, PNorm = 183.1879, GNorm = 0.0697, lr_0 = 1.6821e-04
Loss = 5.3147e-03, PNorm = 183.1914, GNorm = 0.1320, lr_0 = 1.6810e-04
Loss = 9.9207e-03, PNorm = 183.1933, GNorm = 0.1555, lr_0 = 1.6798e-04
Loss = 6.2061e-03, PNorm = 183.1968, GNorm = 0.4056, lr_0 = 1.6787e-04
Loss = 3.8006e-03, PNorm = 183.2021, GNorm = 0.1536, lr_0 = 1.6775e-04
Loss = 9.5236e-03, PNorm = 183.2066, GNorm = 0.7843, lr_0 = 1.6764e-04
Loss = 5.4504e-03, PNorm = 183.2097, GNorm = 0.1543, lr_0 = 1.6752e-04
Loss = 5.9514e-03, PNorm = 183.2123, GNorm = 0.1225, lr_0 = 1.6741e-04
Loss = 2.5391e-03, PNorm = 183.2144, GNorm = 0.1030, lr_0 = 1.6729e-04
Loss = 5.3011e-03, PNorm = 183.2176, GNorm = 0.1297, lr_0 = 1.6718e-04
Loss = 4.3164e-03, PNorm = 183.2202, GNorm = 0.0637, lr_0 = 1.6707e-04
Loss = 3.8630e-03, PNorm = 183.2236, GNorm = 0.1826, lr_0 = 1.6695e-04
Loss = 4.0752e-03, PNorm = 183.2276, GNorm = 0.1267, lr_0 = 1.6684e-04
Loss = 6.2408e-03, PNorm = 183.2306, GNorm = 0.2422, lr_0 = 1.6672e-04
Loss = 3.2951e-03, PNorm = 183.2344, GNorm = 0.1555, lr_0 = 1.6661e-04
Loss = 2.7369e-03, PNorm = 183.2385, GNorm = 0.1074, lr_0 = 1.6649e-04
Loss = 5.7390e-03, PNorm = 183.2424, GNorm = 0.2407, lr_0 = 1.6638e-04
Loss = 2.9188e-03, PNorm = 183.2445, GNorm = 0.1537, lr_0 = 1.6627e-04
Loss = 5.6452e-03, PNorm = 183.2480, GNorm = 0.1613, lr_0 = 1.6615e-04
Loss = 3.3289e-03, PNorm = 183.2510, GNorm = 0.1102, lr_0 = 1.6604e-04
Loss = 4.4383e-03, PNorm = 183.2529, GNorm = 0.1589, lr_0 = 1.6592e-04
Loss = 6.2307e-03, PNorm = 183.2563, GNorm = 0.4982, lr_0 = 1.6581e-04
Loss = 3.1274e-03, PNorm = 183.2603, GNorm = 0.0945, lr_0 = 1.6570e-04
Loss = 2.6308e-03, PNorm = 183.2633, GNorm = 0.0703, lr_0 = 1.6558e-04
Loss = 7.8655e-03, PNorm = 183.2656, GNorm = 0.1646, lr_0 = 1.6547e-04
Loss = 2.9685e-03, PNorm = 183.2687, GNorm = 0.2289, lr_0 = 1.6536e-04
Loss = 3.8431e-03, PNorm = 183.2735, GNorm = 0.0782, lr_0 = 1.6524e-04
Loss = 2.8591e-03, PNorm = 183.2776, GNorm = 0.0661, lr_0 = 1.6513e-04
Loss = 3.6311e-03, PNorm = 183.2809, GNorm = 0.1349, lr_0 = 1.6502e-04
Loss = 6.3994e-03, PNorm = 183.2855, GNorm = 0.2155, lr_0 = 1.6490e-04
Loss = 6.8198e-03, PNorm = 183.2888, GNorm = 0.1318, lr_0 = 1.6479e-04
Loss = 2.4133e-03, PNorm = 183.2936, GNorm = 0.0716, lr_0 = 1.6468e-04
Loss = 6.4310e-03, PNorm = 183.2983, GNorm = 0.1578, lr_0 = 1.6457e-04
Loss = 3.4696e-03, PNorm = 183.3013, GNorm = 0.2781, lr_0 = 1.6445e-04
Loss = 3.3208e-03, PNorm = 183.3053, GNorm = 0.1358, lr_0 = 1.6434e-04
Loss = 8.6394e-03, PNorm = 183.3108, GNorm = 0.1396, lr_0 = 1.6423e-04
Loss = 8.2444e-03, PNorm = 183.3140, GNorm = 0.1219, lr_0 = 1.6412e-04
Loss = 4.5965e-03, PNorm = 183.3179, GNorm = 0.1661, lr_0 = 1.6400e-04
Loss = 5.6338e-03, PNorm = 183.3225, GNorm = 0.0559, lr_0 = 1.6389e-04
Loss = 2.6633e-03, PNorm = 183.3260, GNorm = 0.0816, lr_0 = 1.6378e-04
Validation mae = 0.121186
Epoch 24
Loss = 2.7103e-03, PNorm = 183.3315, GNorm = 0.1376, lr_0 = 1.6367e-04
Loss = 4.1087e-03, PNorm = 183.3346, GNorm = 0.1022, lr_0 = 1.6355e-04
Loss = 7.0303e-03, PNorm = 183.3379, GNorm = 0.0743, lr_0 = 1.6344e-04
Loss = 2.5242e-03, PNorm = 183.3405, GNorm = 0.1861, lr_0 = 1.6333e-04
Loss = 2.8737e-03, PNorm = 183.3414, GNorm = 0.1810, lr_0 = 1.6322e-04
Loss = 6.8608e-03, PNorm = 183.3430, GNorm = 0.2113, lr_0 = 1.6311e-04
Loss = 2.3944e-03, PNorm = 183.3460, GNorm = 0.0866, lr_0 = 1.6299e-04
Loss = 2.7081e-03, PNorm = 183.3488, GNorm = 0.0757, lr_0 = 1.6288e-04
Loss = 8.2729e-03, PNorm = 183.3514, GNorm = 0.8821, lr_0 = 1.6277e-04
Loss = 5.4169e-03, PNorm = 183.3519, GNorm = 0.0775, lr_0 = 1.6266e-04
Loss = 7.6987e-03, PNorm = 183.3542, GNorm = 0.1420, lr_0 = 1.6255e-04
Loss = 2.3747e-03, PNorm = 183.3576, GNorm = 0.0541, lr_0 = 1.6244e-04
Loss = 2.7234e-03, PNorm = 183.3590, GNorm = 0.0626, lr_0 = 1.6233e-04
Loss = 3.3751e-03, PNorm = 183.3619, GNorm = 0.1011, lr_0 = 1.6221e-04
Loss = 3.6090e-03, PNorm = 183.3662, GNorm = 0.2721, lr_0 = 1.6210e-04
Loss = 5.1872e-03, PNorm = 183.3687, GNorm = 0.1681, lr_0 = 1.6199e-04
Loss = 2.1156e-03, PNorm = 183.3716, GNorm = 0.1432, lr_0 = 1.6188e-04
Loss = 2.3336e-03, PNorm = 183.3747, GNorm = 0.2601, lr_0 = 1.6177e-04
Loss = 5.6719e-03, PNorm = 183.3770, GNorm = 0.1490, lr_0 = 1.6166e-04
Loss = 6.1087e-03, PNorm = 183.3785, GNorm = 0.1878, lr_0 = 1.6155e-04
Loss = 4.6665e-03, PNorm = 183.3815, GNorm = 0.1437, lr_0 = 1.6144e-04
Loss = 2.5240e-03, PNorm = 183.3847, GNorm = 0.0873, lr_0 = 1.6133e-04
Loss = 4.7482e-03, PNorm = 183.3866, GNorm = 0.2571, lr_0 = 1.6122e-04
Loss = 3.8898e-03, PNorm = 183.3885, GNorm = 0.0709, lr_0 = 1.6111e-04
Loss = 6.7818e-03, PNorm = 183.3922, GNorm = 0.1156, lr_0 = 1.6100e-04
Loss = 2.5675e-03, PNorm = 183.3939, GNorm = 0.0954, lr_0 = 1.6089e-04
Loss = 2.8224e-03, PNorm = 183.3951, GNorm = 0.0809, lr_0 = 1.6078e-04
Loss = 3.3268e-03, PNorm = 183.3983, GNorm = 0.2188, lr_0 = 1.6067e-04
Loss = 2.7840e-03, PNorm = 183.4004, GNorm = 0.1252, lr_0 = 1.6056e-04
Loss = 4.7369e-03, PNorm = 183.4026, GNorm = 0.2487, lr_0 = 1.6045e-04
Loss = 3.8852e-03, PNorm = 183.4060, GNorm = 0.1223, lr_0 = 1.6034e-04
Loss = 3.1058e-03, PNorm = 183.4083, GNorm = 0.0633, lr_0 = 1.6023e-04
Loss = 5.1635e-03, PNorm = 183.4105, GNorm = 0.0582, lr_0 = 1.6012e-04
Loss = 2.7466e-03, PNorm = 183.4123, GNorm = 0.0623, lr_0 = 1.6001e-04
Loss = 1.9908e-03, PNorm = 183.4148, GNorm = 0.2768, lr_0 = 1.5990e-04
Loss = 5.7123e-03, PNorm = 183.4165, GNorm = 0.1116, lr_0 = 1.5979e-04
Loss = 2.6449e-03, PNorm = 183.4202, GNorm = 0.1041, lr_0 = 1.5968e-04
Loss = 4.3210e-03, PNorm = 183.4233, GNorm = 0.1182, lr_0 = 1.5957e-04
Loss = 7.1874e-03, PNorm = 183.4271, GNorm = 0.0871, lr_0 = 1.5946e-04
Loss = 2.5187e-03, PNorm = 183.4302, GNorm = 0.0848, lr_0 = 1.5935e-04
Loss = 3.5894e-03, PNorm = 183.4320, GNorm = 0.2331, lr_0 = 1.5924e-04
Loss = 2.7768e-03, PNorm = 183.4348, GNorm = 0.1047, lr_0 = 1.5913e-04
Loss = 3.2210e-03, PNorm = 183.4371, GNorm = 0.0697, lr_0 = 1.5902e-04
Loss = 7.5417e-03, PNorm = 183.4387, GNorm = 0.4847, lr_0 = 1.5891e-04
Loss = 4.8173e-03, PNorm = 183.4395, GNorm = 0.1025, lr_0 = 1.5880e-04
Loss = 2.3713e-03, PNorm = 183.4413, GNorm = 0.1260, lr_0 = 1.5870e-04
Loss = 2.7763e-03, PNorm = 183.4430, GNorm = 0.1033, lr_0 = 1.5859e-04
Loss = 1.1212e-02, PNorm = 183.4462, GNorm = 0.1021, lr_0 = 1.5848e-04
Loss = 2.9344e-03, PNorm = 183.4496, GNorm = 0.1469, lr_0 = 1.5837e-04
Loss = 2.3728e-03, PNorm = 183.4534, GNorm = 0.1245, lr_0 = 1.5826e-04
Loss = 2.1425e-03, PNorm = 183.4568, GNorm = 0.0957, lr_0 = 1.5815e-04
Loss = 6.4656e-03, PNorm = 183.4603, GNorm = 0.1100, lr_0 = 1.5804e-04
Loss = 2.5176e-03, PNorm = 183.4633, GNorm = 0.1214, lr_0 = 1.5794e-04
Loss = 1.5603e-03, PNorm = 183.4661, GNorm = 0.0766, lr_0 = 1.5783e-04
Loss = 3.3934e-03, PNorm = 183.4691, GNorm = 0.1031, lr_0 = 1.5772e-04
Loss = 1.7461e-02, PNorm = 183.4734, GNorm = 0.2318, lr_0 = 1.5761e-04
Loss = 5.0456e-03, PNorm = 183.4734, GNorm = 0.1649, lr_0 = 1.5750e-04
Loss = 3.1418e-03, PNorm = 183.4743, GNorm = 0.0766, lr_0 = 1.5740e-04
Loss = 5.7652e-03, PNorm = 183.4764, GNorm = 0.0849, lr_0 = 1.5729e-04
Loss = 2.6656e-03, PNorm = 183.4804, GNorm = 0.0470, lr_0 = 1.5718e-04
Loss = 2.7742e-03, PNorm = 183.4825, GNorm = 0.4452, lr_0 = 1.5707e-04
Loss = 7.2580e-03, PNorm = 183.4866, GNorm = 0.1188, lr_0 = 1.5697e-04
Loss = 2.3135e-03, PNorm = 183.4903, GNorm = 0.0941, lr_0 = 1.5686e-04
Loss = 5.6772e-03, PNorm = 183.4926, GNorm = 0.0980, lr_0 = 1.5675e-04
Loss = 2.7691e-03, PNorm = 183.4955, GNorm = 0.1170, lr_0 = 1.5664e-04
Loss = 5.0067e-03, PNorm = 183.4979, GNorm = 0.1153, lr_0 = 1.5654e-04
Loss = 5.4067e-03, PNorm = 183.5030, GNorm = 0.1390, lr_0 = 1.5643e-04
Loss = 2.8787e-03, PNorm = 183.5061, GNorm = 0.0926, lr_0 = 1.5632e-04
Loss = 6.8558e-03, PNorm = 183.5085, GNorm = 0.1334, lr_0 = 1.5621e-04
Loss = 4.1335e-03, PNorm = 183.5113, GNorm = 0.1200, lr_0 = 1.5611e-04
Loss = 2.9023e-03, PNorm = 183.5141, GNorm = 0.0839, lr_0 = 1.5600e-04
Loss = 4.0221e-03, PNorm = 183.5156, GNorm = 0.0546, lr_0 = 1.5589e-04
Loss = 4.0325e-03, PNorm = 183.5180, GNorm = 0.0695, lr_0 = 1.5579e-04
Loss = 3.2419e-03, PNorm = 183.5217, GNorm = 0.1491, lr_0 = 1.5568e-04
Loss = 4.6050e-03, PNorm = 183.5247, GNorm = 0.1756, lr_0 = 1.5557e-04
Loss = 5.1050e-03, PNorm = 183.5276, GNorm = 0.2170, lr_0 = 1.5547e-04
Loss = 4.2907e-03, PNorm = 183.5314, GNorm = 0.1400, lr_0 = 1.5536e-04
Loss = 1.1070e-02, PNorm = 183.5359, GNorm = 0.1479, lr_0 = 1.5525e-04
Loss = 1.1719e-02, PNorm = 183.5410, GNorm = 0.3348, lr_0 = 1.5515e-04
Loss = 1.8964e-03, PNorm = 183.5459, GNorm = 0.1926, lr_0 = 1.5504e-04
Loss = 2.3742e-03, PNorm = 183.5487, GNorm = 0.1344, lr_0 = 1.5493e-04
Loss = 2.3632e-03, PNorm = 183.5519, GNorm = 0.1461, lr_0 = 1.5483e-04
Loss = 2.8354e-03, PNorm = 183.5565, GNorm = 0.1470, lr_0 = 1.5472e-04
Loss = 3.1109e-03, PNorm = 183.5600, GNorm = 0.1669, lr_0 = 1.5462e-04
Loss = 3.6363e-03, PNorm = 183.5628, GNorm = 0.1535, lr_0 = 1.5451e-04
Loss = 4.2144e-03, PNorm = 183.5653, GNorm = 0.0851, lr_0 = 1.5440e-04
Loss = 3.6586e-03, PNorm = 183.5664, GNorm = 0.1478, lr_0 = 1.5430e-04
Loss = 2.7256e-03, PNorm = 183.5678, GNorm = 0.2790, lr_0 = 1.5419e-04
Loss = 1.0441e-02, PNorm = 183.5692, GNorm = 0.2525, lr_0 = 1.5409e-04
Loss = 4.4313e-03, PNorm = 183.5711, GNorm = 0.0978, lr_0 = 1.5398e-04
Loss = 2.8458e-03, PNorm = 183.5735, GNorm = 0.2891, lr_0 = 1.5388e-04
Loss = 2.6922e-03, PNorm = 183.5761, GNorm = 0.2092, lr_0 = 1.5377e-04
Loss = 4.1994e-03, PNorm = 183.5792, GNorm = 0.1451, lr_0 = 1.5367e-04
Loss = 1.8639e-03, PNorm = 183.5835, GNorm = 0.1238, lr_0 = 1.5356e-04
Loss = 2.8348e-03, PNorm = 183.5855, GNorm = 0.1384, lr_0 = 1.5346e-04
Loss = 1.7961e-03, PNorm = 183.5870, GNorm = 0.0907, lr_0 = 1.5335e-04
Loss = 3.2500e-03, PNorm = 183.5882, GNorm = 0.1656, lr_0 = 1.5325e-04
Loss = 1.8839e-03, PNorm = 183.5899, GNorm = 0.0793, lr_0 = 1.5314e-04
Loss = 2.9305e-03, PNorm = 183.5922, GNorm = 0.0648, lr_0 = 1.5304e-04
Loss = 6.1530e-03, PNorm = 183.5937, GNorm = 0.0932, lr_0 = 1.5293e-04
Loss = 3.8033e-03, PNorm = 183.5962, GNorm = 0.0635, lr_0 = 1.5283e-04
Loss = 8.0436e-03, PNorm = 183.5979, GNorm = 0.0783, lr_0 = 1.5272e-04
Loss = 2.3201e-03, PNorm = 183.6019, GNorm = 0.0600, lr_0 = 1.5262e-04
Loss = 2.1421e-03, PNorm = 183.6051, GNorm = 0.1722, lr_0 = 1.5251e-04
Loss = 2.8516e-03, PNorm = 183.6078, GNorm = 0.1731, lr_0 = 1.5241e-04
Loss = 2.1935e-03, PNorm = 183.6121, GNorm = 0.4033, lr_0 = 1.5230e-04
Loss = 2.5152e-03, PNorm = 183.6159, GNorm = 0.0621, lr_0 = 1.5220e-04
Loss = 2.5237e-03, PNorm = 183.6193, GNorm = 0.0684, lr_0 = 1.5209e-04
Loss = 2.6688e-03, PNorm = 183.6226, GNorm = 0.2600, lr_0 = 1.5199e-04
Loss = 6.0170e-03, PNorm = 183.6267, GNorm = 0.2748, lr_0 = 1.5189e-04
Loss = 1.2012e-02, PNorm = 183.6302, GNorm = 0.1189, lr_0 = 1.5178e-04
Loss = 3.4715e-03, PNorm = 183.6326, GNorm = 0.1269, lr_0 = 1.5168e-04
Loss = 3.1371e-03, PNorm = 183.6358, GNorm = 0.1366, lr_0 = 1.5157e-04
Loss = 4.7250e-03, PNorm = 183.6390, GNorm = 0.1165, lr_0 = 1.5147e-04
Loss = 3.0595e-03, PNorm = 183.6434, GNorm = 0.0779, lr_0 = 1.5137e-04
Loss = 2.6459e-03, PNorm = 183.6481, GNorm = 0.0704, lr_0 = 1.5126e-04
Loss = 2.7056e-03, PNorm = 183.6507, GNorm = 0.0814, lr_0 = 1.5116e-04
Loss = 3.7498e-03, PNorm = 183.6538, GNorm = 0.1269, lr_0 = 1.5106e-04
Loss = 5.0829e-03, PNorm = 183.6562, GNorm = 0.1900, lr_0 = 1.5095e-04
Loss = 4.1845e-03, PNorm = 183.6576, GNorm = 0.1670, lr_0 = 1.5085e-04
Validation mae = 0.121121
Epoch 25
Loss = 2.8319e-03, PNorm = 183.6589, GNorm = 0.0515, lr_0 = 1.5075e-04
Loss = 2.4706e-03, PNorm = 183.6607, GNorm = 0.0955, lr_0 = 1.5064e-04
Loss = 2.8444e-03, PNorm = 183.6634, GNorm = 0.1439, lr_0 = 1.5054e-04
Loss = 1.8684e-03, PNorm = 183.6651, GNorm = 0.1351, lr_0 = 1.5044e-04
Loss = 3.7846e-03, PNorm = 183.6669, GNorm = 0.0699, lr_0 = 1.5033e-04
Loss = 3.6558e-03, PNorm = 183.6686, GNorm = 0.0944, lr_0 = 1.5023e-04
Loss = 2.1170e-03, PNorm = 183.6726, GNorm = 0.0808, lr_0 = 1.5013e-04
Loss = 4.6089e-03, PNorm = 183.6765, GNorm = 0.0646, lr_0 = 1.5002e-04
Loss = 2.8482e-03, PNorm = 183.6788, GNorm = 0.2103, lr_0 = 1.4992e-04
Loss = 5.2065e-03, PNorm = 183.6808, GNorm = 0.0745, lr_0 = 1.4982e-04
Loss = 3.1305e-03, PNorm = 183.6827, GNorm = 0.0697, lr_0 = 1.4972e-04
Loss = 5.0844e-03, PNorm = 183.6843, GNorm = 0.1507, lr_0 = 1.4961e-04
Loss = 2.6815e-03, PNorm = 183.6863, GNorm = 0.1134, lr_0 = 1.4951e-04
Loss = 1.8414e-03, PNorm = 183.6881, GNorm = 0.1374, lr_0 = 1.4941e-04
Loss = 4.4028e-03, PNorm = 183.6899, GNorm = 0.0924, lr_0 = 1.4931e-04
Loss = 1.6741e-03, PNorm = 183.6913, GNorm = 0.1050, lr_0 = 1.4920e-04
Loss = 2.1976e-03, PNorm = 183.6929, GNorm = 0.0916, lr_0 = 1.4910e-04
Loss = 1.6398e-03, PNorm = 183.6947, GNorm = 0.0649, lr_0 = 1.4900e-04
Loss = 5.7249e-03, PNorm = 183.6954, GNorm = 0.2726, lr_0 = 1.4890e-04
Loss = 5.6601e-03, PNorm = 183.6962, GNorm = 0.0905, lr_0 = 1.4880e-04
Loss = 4.2303e-03, PNorm = 183.6972, GNorm = 0.1055, lr_0 = 1.4869e-04
Loss = 3.4469e-03, PNorm = 183.6973, GNorm = 0.1178, lr_0 = 1.4859e-04
Loss = 2.3106e-03, PNorm = 183.7006, GNorm = 0.0691, lr_0 = 1.4849e-04
Loss = 4.6453e-03, PNorm = 183.7042, GNorm = 0.1090, lr_0 = 1.4839e-04
Loss = 5.8753e-03, PNorm = 183.7060, GNorm = 1.5358, lr_0 = 1.4829e-04
Loss = 4.2336e-03, PNorm = 183.7069, GNorm = 0.2409, lr_0 = 1.4818e-04
Loss = 1.6184e-02, PNorm = 183.7116, GNorm = 0.2016, lr_0 = 1.4808e-04
Loss = 2.4654e-03, PNorm = 183.7129, GNorm = 0.1270, lr_0 = 1.4798e-04
Loss = 2.5299e-03, PNorm = 183.7154, GNorm = 0.0519, lr_0 = 1.4788e-04
Loss = 4.6877e-03, PNorm = 183.7184, GNorm = 0.0822, lr_0 = 1.4778e-04
Loss = 4.6131e-03, PNorm = 183.7180, GNorm = 0.0617, lr_0 = 1.4768e-04
Loss = 2.5390e-03, PNorm = 183.7207, GNorm = 0.0903, lr_0 = 1.4758e-04
Loss = 3.1043e-03, PNorm = 183.7236, GNorm = 0.0535, lr_0 = 1.4748e-04
Loss = 3.2482e-03, PNorm = 183.7268, GNorm = 0.1369, lr_0 = 1.4737e-04
Loss = 2.4609e-03, PNorm = 183.7308, GNorm = 0.2839, lr_0 = 1.4727e-04
Loss = 1.6776e-03, PNorm = 183.7327, GNorm = 0.1165, lr_0 = 1.4717e-04
Loss = 4.0397e-03, PNorm = 183.7327, GNorm = 0.1895, lr_0 = 1.4707e-04
Loss = 3.9301e-03, PNorm = 183.7347, GNorm = 0.1014, lr_0 = 1.4697e-04
Loss = 4.2565e-03, PNorm = 183.7370, GNorm = 0.1911, lr_0 = 1.4687e-04
Loss = 3.7353e-03, PNorm = 183.7400, GNorm = 0.0861, lr_0 = 1.4677e-04
Loss = 3.2331e-03, PNorm = 183.7424, GNorm = 0.1180, lr_0 = 1.4667e-04
Loss = 3.2306e-03, PNorm = 183.7441, GNorm = 0.4480, lr_0 = 1.4657e-04
Loss = 3.2032e-03, PNorm = 183.7470, GNorm = 0.0462, lr_0 = 1.4647e-04
Loss = 3.9301e-03, PNorm = 183.7500, GNorm = 0.0586, lr_0 = 1.4637e-04
Loss = 5.6169e-03, PNorm = 183.7520, GNorm = 0.1140, lr_0 = 1.4627e-04
Loss = 2.6659e-03, PNorm = 183.7541, GNorm = 0.1996, lr_0 = 1.4617e-04
Loss = 4.4670e-03, PNorm = 183.7559, GNorm = 0.2602, lr_0 = 1.4607e-04
Loss = 4.9897e-03, PNorm = 183.7567, GNorm = 0.0809, lr_0 = 1.4597e-04
Loss = 1.6059e-03, PNorm = 183.7579, GNorm = 0.0854, lr_0 = 1.4587e-04
Loss = 4.7767e-03, PNorm = 183.7594, GNorm = 0.0921, lr_0 = 1.4577e-04
Loss = 5.5485e-03, PNorm = 183.7614, GNorm = 0.2813, lr_0 = 1.4567e-04
Loss = 4.3023e-03, PNorm = 183.7644, GNorm = 0.1610, lr_0 = 1.4557e-04
Loss = 1.8900e-03, PNorm = 183.7657, GNorm = 0.1508, lr_0 = 1.4547e-04
Loss = 2.6309e-03, PNorm = 183.7673, GNorm = 0.1984, lr_0 = 1.4537e-04
Loss = 4.1362e-03, PNorm = 183.7703, GNorm = 0.1256, lr_0 = 1.4527e-04
Loss = 5.8998e-03, PNorm = 183.7739, GNorm = 0.1774, lr_0 = 1.4517e-04
Loss = 3.4638e-03, PNorm = 183.7763, GNorm = 0.1186, lr_0 = 1.4507e-04
Loss = 5.3162e-03, PNorm = 183.7784, GNorm = 0.2572, lr_0 = 1.4497e-04
Loss = 1.6627e-03, PNorm = 183.7815, GNorm = 0.1496, lr_0 = 1.4487e-04
Loss = 3.7138e-03, PNorm = 183.7845, GNorm = 0.1625, lr_0 = 1.4477e-04
Loss = 2.9486e-03, PNorm = 183.7868, GNorm = 0.1213, lr_0 = 1.4467e-04
Loss = 2.2907e-03, PNorm = 183.7894, GNorm = 0.1327, lr_0 = 1.4457e-04
Loss = 6.5429e-03, PNorm = 183.7937, GNorm = 0.1315, lr_0 = 1.4447e-04
Loss = 4.5187e-03, PNorm = 183.7960, GNorm = 0.4810, lr_0 = 1.4438e-04
Loss = 4.9780e-03, PNorm = 183.7977, GNorm = 0.1287, lr_0 = 1.4428e-04
Loss = 1.9502e-03, PNorm = 183.7992, GNorm = 0.0589, lr_0 = 1.4418e-04
Loss = 3.6522e-03, PNorm = 183.8001, GNorm = 0.1015, lr_0 = 1.4408e-04
Loss = 3.4791e-03, PNorm = 183.8012, GNorm = 0.1580, lr_0 = 1.4398e-04
Loss = 5.3555e-03, PNorm = 183.8018, GNorm = 0.1051, lr_0 = 1.4388e-04
Loss = 1.6124e-03, PNorm = 183.8043, GNorm = 0.0712, lr_0 = 1.4378e-04
Loss = 2.2340e-03, PNorm = 183.8069, GNorm = 0.1027, lr_0 = 1.4368e-04
Loss = 4.8091e-03, PNorm = 183.8089, GNorm = 0.0851, lr_0 = 1.4359e-04
Loss = 2.5783e-03, PNorm = 183.8103, GNorm = 0.0677, lr_0 = 1.4349e-04
Loss = 6.5346e-03, PNorm = 183.8116, GNorm = 0.1627, lr_0 = 1.4339e-04
Loss = 2.3357e-03, PNorm = 183.8142, GNorm = 0.0765, lr_0 = 1.4329e-04
Loss = 7.1376e-03, PNorm = 183.8164, GNorm = 1.2091, lr_0 = 1.4319e-04
Loss = 2.5166e-03, PNorm = 183.8176, GNorm = 0.0742, lr_0 = 1.4310e-04
Loss = 2.9780e-03, PNorm = 183.8198, GNorm = 0.1468, lr_0 = 1.4300e-04
Loss = 4.4116e-03, PNorm = 183.8216, GNorm = 0.0698, lr_0 = 1.4290e-04
Loss = 4.4131e-03, PNorm = 183.8239, GNorm = 0.1798, lr_0 = 1.4280e-04
Loss = 2.5532e-03, PNorm = 183.8271, GNorm = 0.0489, lr_0 = 1.4270e-04
Loss = 4.1034e-03, PNorm = 183.8313, GNorm = 0.5672, lr_0 = 1.4261e-04
Loss = 1.8846e-03, PNorm = 183.8346, GNorm = 0.1217, lr_0 = 1.4251e-04
Loss = 7.3244e-03, PNorm = 183.8363, GNorm = 0.0974, lr_0 = 1.4241e-04
Loss = 7.0477e-03, PNorm = 183.8385, GNorm = 0.5476, lr_0 = 1.4231e-04
Loss = 2.9444e-03, PNorm = 183.8398, GNorm = 0.0580, lr_0 = 1.4222e-04
Loss = 4.9226e-03, PNorm = 183.8419, GNorm = 0.2333, lr_0 = 1.4212e-04
Loss = 2.2300e-03, PNorm = 183.8442, GNorm = 0.2200, lr_0 = 1.4202e-04
Loss = 6.0328e-03, PNorm = 183.8463, GNorm = 0.3099, lr_0 = 1.4192e-04
Loss = 4.1305e-03, PNorm = 183.8498, GNorm = 0.1021, lr_0 = 1.4183e-04
Loss = 5.4987e-03, PNorm = 183.8524, GNorm = 0.1771, lr_0 = 1.4173e-04
Loss = 2.8543e-03, PNorm = 183.8560, GNorm = 0.1749, lr_0 = 1.4163e-04
Loss = 2.5641e-03, PNorm = 183.8587, GNorm = 0.3792, lr_0 = 1.4153e-04
Loss = 1.6895e-03, PNorm = 183.8620, GNorm = 0.0795, lr_0 = 1.4144e-04
Loss = 6.9295e-03, PNorm = 183.8639, GNorm = 0.5327, lr_0 = 1.4134e-04
Loss = 4.4695e-03, PNorm = 183.8651, GNorm = 0.1205, lr_0 = 1.4124e-04
Loss = 2.8080e-03, PNorm = 183.8667, GNorm = 0.0583, lr_0 = 1.4115e-04
Loss = 7.7434e-03, PNorm = 183.8687, GNorm = 0.1376, lr_0 = 1.4105e-04
Loss = 3.1609e-03, PNorm = 183.8703, GNorm = 0.1877, lr_0 = 1.4095e-04
Loss = 3.0678e-03, PNorm = 183.8728, GNorm = 0.1395, lr_0 = 1.4086e-04
Loss = 1.9368e-03, PNorm = 183.8754, GNorm = 0.0721, lr_0 = 1.4076e-04
Loss = 3.1902e-03, PNorm = 183.8770, GNorm = 0.1102, lr_0 = 1.4066e-04
Loss = 5.8502e-03, PNorm = 183.8790, GNorm = 0.0779, lr_0 = 1.4057e-04
Loss = 8.7940e-03, PNorm = 183.8813, GNorm = 0.4190, lr_0 = 1.4047e-04
Loss = 2.5471e-03, PNorm = 183.8847, GNorm = 0.0752, lr_0 = 1.4038e-04
Loss = 3.4451e-03, PNorm = 183.8868, GNorm = 0.1782, lr_0 = 1.4028e-04
Loss = 3.0460e-03, PNorm = 183.8900, GNorm = 0.0987, lr_0 = 1.4018e-04
Loss = 2.3560e-03, PNorm = 183.8916, GNorm = 0.1126, lr_0 = 1.4009e-04
Loss = 2.3103e-03, PNorm = 183.8940, GNorm = 0.2737, lr_0 = 1.3999e-04
Loss = 5.0857e-03, PNorm = 183.8969, GNorm = 0.7488, lr_0 = 1.3990e-04
Loss = 2.3615e-03, PNorm = 183.9004, GNorm = 0.0610, lr_0 = 1.3980e-04
Loss = 4.2767e-03, PNorm = 183.9030, GNorm = 0.1408, lr_0 = 1.3970e-04
Loss = 7.9657e-03, PNorm = 183.9061, GNorm = 0.0412, lr_0 = 1.3961e-04
Loss = 3.9217e-03, PNorm = 183.9084, GNorm = 0.1317, lr_0 = 1.3951e-04
Loss = 7.4540e-03, PNorm = 183.9106, GNorm = 0.1080, lr_0 = 1.3942e-04
Loss = 1.8128e-03, PNorm = 183.9137, GNorm = 0.0772, lr_0 = 1.3932e-04
Loss = 7.4075e-03, PNorm = 183.9157, GNorm = 0.6268, lr_0 = 1.3923e-04
Loss = 4.5087e-03, PNorm = 183.9189, GNorm = 0.1609, lr_0 = 1.3913e-04
Loss = 2.1093e-03, PNorm = 183.9223, GNorm = 0.2158, lr_0 = 1.3904e-04
Loss = 1.9877e-03, PNorm = 183.9236, GNorm = 0.0561, lr_0 = 1.3894e-04
Validation mae = 0.121037
Epoch 26
Loss = 3.3044e-03, PNorm = 183.9250, GNorm = 0.1450, lr_0 = 1.3884e-04
Loss = 2.4627e-03, PNorm = 183.9257, GNorm = 0.0749, lr_0 = 1.3875e-04
Loss = 2.2043e-03, PNorm = 183.9263, GNorm = 0.1298, lr_0 = 1.3865e-04
Loss = 3.7460e-03, PNorm = 183.9266, GNorm = 0.1413, lr_0 = 1.3856e-04
Loss = 4.3872e-03, PNorm = 183.9283, GNorm = 0.1065, lr_0 = 1.3846e-04
Loss = 2.0122e-03, PNorm = 183.9301, GNorm = 0.1219, lr_0 = 1.3837e-04
Loss = 2.1096e-03, PNorm = 183.9308, GNorm = 0.0972, lr_0 = 1.3828e-04
Loss = 2.9139e-03, PNorm = 183.9313, GNorm = 0.0620, lr_0 = 1.3818e-04
Loss = 3.9762e-03, PNorm = 183.9330, GNorm = 0.0534, lr_0 = 1.3809e-04
Loss = 1.5242e-03, PNorm = 183.9343, GNorm = 0.1454, lr_0 = 1.3799e-04
Loss = 2.6586e-03, PNorm = 183.9370, GNorm = 0.2832, lr_0 = 1.3790e-04
Loss = 4.5622e-03, PNorm = 183.9376, GNorm = 0.1191, lr_0 = 1.3780e-04
Loss = 2.4535e-03, PNorm = 183.9402, GNorm = 0.1347, lr_0 = 1.3771e-04
Loss = 2.9805e-03, PNorm = 183.9417, GNorm = 0.0810, lr_0 = 1.3761e-04
Loss = 3.3781e-03, PNorm = 183.9442, GNorm = 0.0540, lr_0 = 1.3752e-04
Loss = 2.1401e-03, PNorm = 183.9459, GNorm = 0.1160, lr_0 = 1.3742e-04
Loss = 5.2535e-03, PNorm = 183.9473, GNorm = 0.0698, lr_0 = 1.3733e-04
Loss = 5.3977e-03, PNorm = 183.9500, GNorm = 0.0574, lr_0 = 1.3724e-04
Loss = 2.1255e-03, PNorm = 183.9528, GNorm = 0.1690, lr_0 = 1.3714e-04
Loss = 5.6261e-03, PNorm = 183.9537, GNorm = 0.0705, lr_0 = 1.3705e-04
Loss = 1.7230e-03, PNorm = 183.9554, GNorm = 0.2215, lr_0 = 1.3695e-04
Loss = 3.4407e-03, PNorm = 183.9576, GNorm = 0.2086, lr_0 = 1.3686e-04
Loss = 3.7166e-03, PNorm = 183.9602, GNorm = 0.0478, lr_0 = 1.3677e-04
Loss = 1.8081e-03, PNorm = 183.9635, GNorm = 0.1717, lr_0 = 1.3667e-04
Loss = 2.7750e-03, PNorm = 183.9654, GNorm = 0.0502, lr_0 = 1.3658e-04
Loss = 2.9464e-03, PNorm = 183.9665, GNorm = 0.1627, lr_0 = 1.3649e-04
Loss = 4.6880e-03, PNorm = 183.9672, GNorm = 0.0673, lr_0 = 1.3639e-04
Loss = 4.3527e-03, PNorm = 183.9685, GNorm = 0.1371, lr_0 = 1.3630e-04
Loss = 1.6249e-03, PNorm = 183.9711, GNorm = 0.0867, lr_0 = 1.3621e-04
Loss = 2.0137e-03, PNorm = 183.9722, GNorm = 0.0737, lr_0 = 1.3611e-04
Loss = 6.3006e-03, PNorm = 183.9735, GNorm = 0.1550, lr_0 = 1.3602e-04
Loss = 1.7019e-03, PNorm = 183.9744, GNorm = 0.1009, lr_0 = 1.3593e-04
Loss = 1.8069e-03, PNorm = 183.9750, GNorm = 0.0520, lr_0 = 1.3583e-04
Loss = 2.2170e-03, PNorm = 183.9763, GNorm = 0.1450, lr_0 = 1.3574e-04
Loss = 2.4367e-03, PNorm = 183.9778, GNorm = 0.1528, lr_0 = 1.3565e-04
Loss = 4.2016e-03, PNorm = 183.9803, GNorm = 0.0883, lr_0 = 1.3555e-04
Loss = 2.1498e-03, PNorm = 183.9819, GNorm = 0.0530, lr_0 = 1.3546e-04
Loss = 2.1357e-03, PNorm = 183.9824, GNorm = 0.1299, lr_0 = 1.3537e-04
Loss = 1.9929e-03, PNorm = 183.9843, GNorm = 0.0952, lr_0 = 1.3528e-04
Loss = 1.7843e-03, PNorm = 183.9856, GNorm = 0.0858, lr_0 = 1.3518e-04
Loss = 3.7679e-03, PNorm = 183.9866, GNorm = 0.0636, lr_0 = 1.3509e-04
Loss = 1.6717e-03, PNorm = 183.9886, GNorm = 0.2380, lr_0 = 1.3500e-04
Loss = 4.2222e-03, PNorm = 183.9905, GNorm = 0.0611, lr_0 = 1.3491e-04
Loss = 2.8903e-03, PNorm = 183.9925, GNorm = 0.0827, lr_0 = 1.3481e-04
Loss = 2.8621e-03, PNorm = 183.9929, GNorm = 0.1385, lr_0 = 1.3472e-04
Loss = 6.5500e-03, PNorm = 183.9945, GNorm = 0.1501, lr_0 = 1.3463e-04
Loss = 3.7597e-03, PNorm = 183.9976, GNorm = 0.2402, lr_0 = 1.3454e-04
Loss = 3.4983e-03, PNorm = 184.0006, GNorm = 0.1263, lr_0 = 1.3444e-04
Loss = 3.7046e-03, PNorm = 184.0029, GNorm = 0.1250, lr_0 = 1.3435e-04
Loss = 1.3906e-03, PNorm = 184.0052, GNorm = 0.0622, lr_0 = 1.3426e-04
Loss = 3.9277e-03, PNorm = 184.0076, GNorm = 0.1175, lr_0 = 1.3417e-04
Loss = 2.2145e-03, PNorm = 184.0101, GNorm = 0.1172, lr_0 = 1.3408e-04
Loss = 6.2387e-03, PNorm = 184.0112, GNorm = 0.1198, lr_0 = 1.3398e-04
Loss = 6.5674e-03, PNorm = 184.0128, GNorm = 0.1356, lr_0 = 1.3389e-04
Loss = 7.1643e-03, PNorm = 184.0166, GNorm = 0.6164, lr_0 = 1.3380e-04
Loss = 2.8344e-03, PNorm = 184.0174, GNorm = 0.0499, lr_0 = 1.3371e-04
Loss = 3.0012e-03, PNorm = 184.0189, GNorm = 0.1278, lr_0 = 1.3362e-04
Loss = 2.1363e-03, PNorm = 184.0206, GNorm = 0.0747, lr_0 = 1.3353e-04
Loss = 2.6371e-03, PNorm = 184.0200, GNorm = 0.0582, lr_0 = 1.3343e-04
Loss = 6.4433e-03, PNorm = 184.0204, GNorm = 0.0953, lr_0 = 1.3334e-04
Loss = 5.8913e-03, PNorm = 184.0221, GNorm = 0.2073, lr_0 = 1.3325e-04
Loss = 6.8849e-03, PNorm = 184.0240, GNorm = 0.3771, lr_0 = 1.3316e-04
Loss = 6.7807e-03, PNorm = 184.0259, GNorm = 0.0596, lr_0 = 1.3307e-04
Loss = 1.8203e-03, PNorm = 184.0287, GNorm = 0.1759, lr_0 = 1.3298e-04
Loss = 1.4950e-03, PNorm = 184.0316, GNorm = 0.0820, lr_0 = 1.3289e-04
Loss = 5.8537e-03, PNorm = 184.0335, GNorm = 0.0724, lr_0 = 1.3280e-04
Loss = 3.6887e-03, PNorm = 184.0349, GNorm = 0.2717, lr_0 = 1.3270e-04
Loss = 5.1040e-03, PNorm = 184.0373, GNorm = 0.0921, lr_0 = 1.3261e-04
Loss = 2.0816e-03, PNorm = 184.0398, GNorm = 0.0819, lr_0 = 1.3252e-04
Loss = 1.7659e-03, PNorm = 184.0423, GNorm = 0.0678, lr_0 = 1.3243e-04
Loss = 5.8658e-03, PNorm = 184.0449, GNorm = 0.0739, lr_0 = 1.3234e-04
Loss = 4.2665e-03, PNorm = 184.0471, GNorm = 0.0708, lr_0 = 1.3225e-04
Loss = 5.3386e-03, PNorm = 184.0484, GNorm = 0.1410, lr_0 = 1.3216e-04
Loss = 2.1818e-03, PNorm = 184.0510, GNorm = 0.2312, lr_0 = 1.3207e-04
Loss = 3.8300e-03, PNorm = 184.0527, GNorm = 0.0363, lr_0 = 1.3198e-04
Loss = 4.4572e-03, PNorm = 184.0553, GNorm = 0.2921, lr_0 = 1.3189e-04
Loss = 3.6780e-03, PNorm = 184.0567, GNorm = 0.2609, lr_0 = 1.3180e-04
Loss = 2.2122e-03, PNorm = 184.0584, GNorm = 0.0885, lr_0 = 1.3171e-04
Loss = 4.3962e-03, PNorm = 184.0596, GNorm = 0.1838, lr_0 = 1.3162e-04
Loss = 2.1941e-03, PNorm = 184.0612, GNorm = 0.0983, lr_0 = 1.3153e-04
Loss = 3.9937e-03, PNorm = 184.0631, GNorm = 0.2782, lr_0 = 1.3144e-04
Loss = 6.2814e-03, PNorm = 184.0663, GNorm = 0.1057, lr_0 = 1.3135e-04
Loss = 2.4977e-03, PNorm = 184.0691, GNorm = 0.1232, lr_0 = 1.3126e-04
Loss = 8.4246e-03, PNorm = 184.0709, GNorm = 0.0807, lr_0 = 1.3117e-04
Loss = 2.0001e-03, PNorm = 184.0716, GNorm = 0.1785, lr_0 = 1.3108e-04
Loss = 2.9628e-03, PNorm = 184.0747, GNorm = 0.1308, lr_0 = 1.3099e-04
Loss = 2.7961e-03, PNorm = 184.0767, GNorm = 0.0937, lr_0 = 1.3090e-04
Loss = 2.0503e-03, PNorm = 184.0780, GNorm = 0.0955, lr_0 = 1.3081e-04
Loss = 5.1409e-03, PNorm = 184.0804, GNorm = 0.2464, lr_0 = 1.3072e-04
Loss = 1.8862e-03, PNorm = 184.0822, GNorm = 0.1410, lr_0 = 1.3063e-04
Loss = 2.1824e-03, PNorm = 184.0844, GNorm = 0.2034, lr_0 = 1.3054e-04
Loss = 3.0524e-03, PNorm = 184.0874, GNorm = 0.1477, lr_0 = 1.3045e-04
Loss = 5.8971e-03, PNorm = 184.0881, GNorm = 0.1754, lr_0 = 1.3036e-04
Loss = 2.3391e-03, PNorm = 184.0899, GNorm = 0.0599, lr_0 = 1.3027e-04
Loss = 3.1912e-03, PNorm = 184.0924, GNorm = 0.1349, lr_0 = 1.3018e-04
Loss = 2.0866e-03, PNorm = 184.0936, GNorm = 0.0823, lr_0 = 1.3009e-04
Loss = 4.2983e-03, PNorm = 184.0958, GNorm = 0.1167, lr_0 = 1.3000e-04
Loss = 2.6128e-03, PNorm = 184.0968, GNorm = 0.0665, lr_0 = 1.2992e-04
Loss = 3.4685e-03, PNorm = 184.0973, GNorm = 0.1199, lr_0 = 1.2983e-04
Loss = 3.9452e-03, PNorm = 184.1001, GNorm = 0.0727, lr_0 = 1.2974e-04
Loss = 4.4813e-03, PNorm = 184.1035, GNorm = 0.1158, lr_0 = 1.2965e-04
Loss = 2.2227e-03, PNorm = 184.1060, GNorm = 0.0729, lr_0 = 1.2956e-04
Loss = 2.1806e-03, PNorm = 184.1081, GNorm = 0.0660, lr_0 = 1.2947e-04
Loss = 3.2893e-03, PNorm = 184.1099, GNorm = 0.0969, lr_0 = 1.2938e-04
Loss = 5.2844e-03, PNorm = 184.1131, GNorm = 0.0934, lr_0 = 1.2929e-04
Loss = 1.7019e-02, PNorm = 184.1168, GNorm = 0.1236, lr_0 = 1.2921e-04
Loss = 3.5600e-03, PNorm = 184.1192, GNorm = 0.0869, lr_0 = 1.2912e-04
Loss = 3.3917e-03, PNorm = 184.1225, GNorm = 0.2482, lr_0 = 1.2903e-04
Loss = 4.4247e-03, PNorm = 184.1242, GNorm = 0.1260, lr_0 = 1.2894e-04
Loss = 6.4707e-03, PNorm = 184.1291, GNorm = 0.2371, lr_0 = 1.2885e-04
Loss = 1.7713e-03, PNorm = 184.1331, GNorm = 0.0512, lr_0 = 1.2876e-04
Loss = 3.2628e-03, PNorm = 184.1348, GNorm = 0.0651, lr_0 = 1.2867e-04
Loss = 2.2215e-03, PNorm = 184.1350, GNorm = 0.1202, lr_0 = 1.2859e-04
Loss = 1.7737e-03, PNorm = 184.1350, GNorm = 0.0731, lr_0 = 1.2850e-04
Loss = 4.1438e-03, PNorm = 184.1349, GNorm = 0.2269, lr_0 = 1.2841e-04
Loss = 3.2064e-03, PNorm = 184.1358, GNorm = 0.1820, lr_0 = 1.2832e-04
Loss = 1.3986e-03, PNorm = 184.1381, GNorm = 0.0990, lr_0 = 1.2823e-04
Loss = 5.5379e-03, PNorm = 184.1397, GNorm = 0.1056, lr_0 = 1.2815e-04
Loss = 2.5920e-03, PNorm = 184.1400, GNorm = 0.1151, lr_0 = 1.2806e-04
Loss = 1.9099e-03, PNorm = 184.1400, GNorm = 0.0461, lr_0 = 1.2797e-04
Validation mae = 0.120991
Epoch 27
Loss = 5.4465e-03, PNorm = 184.1430, GNorm = 0.0710, lr_0 = 1.2788e-04
Loss = 2.2630e-03, PNorm = 184.1439, GNorm = 0.0834, lr_0 = 1.2780e-04
Loss = 2.7691e-03, PNorm = 184.1448, GNorm = 0.0842, lr_0 = 1.2771e-04
Loss = 3.0740e-03, PNorm = 184.1455, GNorm = 0.8322, lr_0 = 1.2762e-04
Loss = 5.9019e-03, PNorm = 184.1477, GNorm = 0.2965, lr_0 = 1.2753e-04
Loss = 6.9611e-03, PNorm = 184.1493, GNorm = 0.1773, lr_0 = 1.2745e-04
Loss = 4.4969e-03, PNorm = 184.1505, GNorm = 0.0634, lr_0 = 1.2736e-04
Loss = 1.4271e-03, PNorm = 184.1515, GNorm = 0.1436, lr_0 = 1.2727e-04
Loss = 1.9139e-03, PNorm = 184.1532, GNorm = 0.1336, lr_0 = 1.2718e-04
Loss = 1.7365e-03, PNorm = 184.1550, GNorm = 0.0891, lr_0 = 1.2710e-04
Loss = 2.9485e-03, PNorm = 184.1568, GNorm = 0.1601, lr_0 = 1.2701e-04
Loss = 2.1611e-03, PNorm = 184.1591, GNorm = 0.1714, lr_0 = 1.2692e-04
Loss = 2.7163e-03, PNorm = 184.1615, GNorm = 0.2439, lr_0 = 1.2684e-04
Loss = 2.2401e-03, PNorm = 184.1630, GNorm = 0.1449, lr_0 = 1.2675e-04
Loss = 1.5452e-03, PNorm = 184.1639, GNorm = 0.1262, lr_0 = 1.2666e-04
Loss = 4.2679e-03, PNorm = 184.1659, GNorm = 0.1227, lr_0 = 1.2658e-04
Loss = 2.1849e-03, PNorm = 184.1679, GNorm = 0.0848, lr_0 = 1.2649e-04
Loss = 1.5515e-03, PNorm = 184.1698, GNorm = 0.2651, lr_0 = 1.2640e-04
Loss = 4.6644e-03, PNorm = 184.1709, GNorm = 0.6471, lr_0 = 1.2632e-04
Loss = 4.4059e-03, PNorm = 184.1728, GNorm = 0.1699, lr_0 = 1.2623e-04
Loss = 1.4349e-03, PNorm = 184.1731, GNorm = 0.0916, lr_0 = 1.2614e-04
Loss = 7.6211e-03, PNorm = 184.1744, GNorm = 0.0595, lr_0 = 1.2606e-04
Loss = 2.9823e-03, PNorm = 184.1748, GNorm = 0.0573, lr_0 = 1.2597e-04
Loss = 4.0601e-03, PNorm = 184.1760, GNorm = 0.0460, lr_0 = 1.2588e-04
Loss = 2.8946e-03, PNorm = 184.1782, GNorm = 0.1204, lr_0 = 1.2580e-04
Loss = 3.7257e-03, PNorm = 184.1787, GNorm = 0.0474, lr_0 = 1.2571e-04
Loss = 3.0109e-03, PNorm = 184.1811, GNorm = 0.0783, lr_0 = 1.2563e-04
Loss = 2.9732e-03, PNorm = 184.1817, GNorm = 0.0575, lr_0 = 1.2554e-04
Loss = 3.2392e-03, PNorm = 184.1829, GNorm = 0.0923, lr_0 = 1.2545e-04
Loss = 1.5576e-03, PNorm = 184.1835, GNorm = 0.0870, lr_0 = 1.2537e-04
Loss = 1.4000e-03, PNorm = 184.1840, GNorm = 0.0575, lr_0 = 1.2528e-04
Loss = 2.2947e-03, PNorm = 184.1848, GNorm = 0.1077, lr_0 = 1.2520e-04
Loss = 1.6226e-03, PNorm = 184.1857, GNorm = 0.2404, lr_0 = 1.2511e-04
Loss = 1.8311e-03, PNorm = 184.1868, GNorm = 0.1758, lr_0 = 1.2502e-04
Loss = 1.7131e-03, PNorm = 184.1880, GNorm = 0.0934, lr_0 = 1.2494e-04
Loss = 2.0663e-03, PNorm = 184.1888, GNorm = 0.0630, lr_0 = 1.2485e-04
Loss = 1.8056e-03, PNorm = 184.1902, GNorm = 0.1275, lr_0 = 1.2477e-04
Loss = 1.2895e-03, PNorm = 184.1923, GNorm = 0.0622, lr_0 = 1.2468e-04
Loss = 3.4772e-03, PNorm = 184.1937, GNorm = 0.2181, lr_0 = 1.2460e-04
Loss = 1.3267e-02, PNorm = 184.1945, GNorm = 0.4405, lr_0 = 1.2451e-04
Loss = 2.7001e-03, PNorm = 184.1946, GNorm = 0.3316, lr_0 = 1.2443e-04
Loss = 7.1389e-03, PNorm = 184.1952, GNorm = 0.1057, lr_0 = 1.2434e-04
Loss = 2.2859e-03, PNorm = 184.1967, GNorm = 0.0998, lr_0 = 1.2426e-04
Loss = 1.6469e-03, PNorm = 184.1975, GNorm = 0.0828, lr_0 = 1.2417e-04
Loss = 1.2326e-03, PNorm = 184.1983, GNorm = 0.1233, lr_0 = 1.2409e-04
Loss = 2.8030e-03, PNorm = 184.1996, GNorm = 0.1196, lr_0 = 1.2400e-04
Loss = 4.1604e-03, PNorm = 184.2014, GNorm = 0.0664, lr_0 = 1.2392e-04
Loss = 2.6849e-03, PNorm = 184.2038, GNorm = 0.3828, lr_0 = 1.2383e-04
Loss = 2.5287e-03, PNorm = 184.2061, GNorm = 0.0434, lr_0 = 1.2375e-04
Loss = 1.7897e-03, PNorm = 184.2069, GNorm = 0.1725, lr_0 = 1.2366e-04
Loss = 2.7208e-03, PNorm = 184.2074, GNorm = 0.1416, lr_0 = 1.2358e-04
Loss = 6.4743e-03, PNorm = 184.2087, GNorm = 0.0662, lr_0 = 1.2349e-04
Loss = 5.2094e-03, PNorm = 184.2094, GNorm = 0.0892, lr_0 = 1.2341e-04
Loss = 3.5841e-03, PNorm = 184.2113, GNorm = 0.2545, lr_0 = 1.2332e-04
Loss = 4.2796e-03, PNorm = 184.2137, GNorm = 0.1017, lr_0 = 1.2324e-04
Loss = 3.8737e-03, PNorm = 184.2156, GNorm = 0.0920, lr_0 = 1.2315e-04
Loss = 1.8609e-03, PNorm = 184.2166, GNorm = 0.0890, lr_0 = 1.2307e-04
Loss = 6.4388e-03, PNorm = 184.2179, GNorm = 0.6466, lr_0 = 1.2298e-04
Loss = 3.3248e-03, PNorm = 184.2205, GNorm = 0.1599, lr_0 = 1.2290e-04
Loss = 5.6858e-03, PNorm = 184.2244, GNorm = 0.0869, lr_0 = 1.2282e-04
Loss = 2.7228e-03, PNorm = 184.2278, GNorm = 0.0709, lr_0 = 1.2273e-04
Loss = 1.7110e-03, PNorm = 184.2311, GNorm = 0.0703, lr_0 = 1.2265e-04
Loss = 2.0727e-03, PNorm = 184.2343, GNorm = 0.0712, lr_0 = 1.2256e-04
Loss = 5.3514e-03, PNorm = 184.2357, GNorm = 0.1045, lr_0 = 1.2248e-04
Loss = 2.5948e-03, PNorm = 184.2373, GNorm = 0.2620, lr_0 = 1.2240e-04
Loss = 3.1751e-03, PNorm = 184.2384, GNorm = 0.1294, lr_0 = 1.2231e-04
Loss = 1.5463e-03, PNorm = 184.2386, GNorm = 0.0461, lr_0 = 1.2223e-04
Loss = 1.9300e-03, PNorm = 184.2397, GNorm = 0.1175, lr_0 = 1.2214e-04
Loss = 1.4580e-03, PNorm = 184.2411, GNorm = 0.0849, lr_0 = 1.2206e-04
Loss = 3.0684e-03, PNorm = 184.2418, GNorm = 0.0539, lr_0 = 1.2198e-04
Loss = 2.1872e-03, PNorm = 184.2424, GNorm = 0.0851, lr_0 = 1.2189e-04
Loss = 1.9273e-03, PNorm = 184.2434, GNorm = 0.0710, lr_0 = 1.2181e-04
Loss = 1.3090e-03, PNorm = 184.2449, GNorm = 0.1762, lr_0 = 1.2173e-04
Loss = 2.4435e-03, PNorm = 184.2463, GNorm = 0.1666, lr_0 = 1.2164e-04
Loss = 5.5592e-03, PNorm = 184.2477, GNorm = 0.1490, lr_0 = 1.2156e-04
Loss = 3.4349e-03, PNorm = 184.2485, GNorm = 0.2863, lr_0 = 1.2148e-04
Loss = 5.8960e-03, PNorm = 184.2503, GNorm = 0.2261, lr_0 = 1.2139e-04
Loss = 1.5014e-03, PNorm = 184.2531, GNorm = 0.1021, lr_0 = 1.2131e-04
Loss = 1.9517e-03, PNorm = 184.2551, GNorm = 0.1807, lr_0 = 1.2123e-04
Loss = 2.7547e-03, PNorm = 184.2576, GNorm = 0.1789, lr_0 = 1.2114e-04
Loss = 3.6358e-03, PNorm = 184.2599, GNorm = 0.0820, lr_0 = 1.2106e-04
Loss = 4.6993e-03, PNorm = 184.2619, GNorm = 0.1575, lr_0 = 1.2098e-04
Loss = 6.2443e-03, PNorm = 184.2630, GNorm = 0.0581, lr_0 = 1.2090e-04
Loss = 3.1940e-03, PNorm = 184.2656, GNorm = 0.0582, lr_0 = 1.2081e-04
Loss = 1.3676e-03, PNorm = 184.2679, GNorm = 0.1165, lr_0 = 1.2073e-04
Loss = 4.1478e-03, PNorm = 184.2682, GNorm = 0.1984, lr_0 = 1.2065e-04
Loss = 2.9311e-03, PNorm = 184.2706, GNorm = 0.0537, lr_0 = 1.2056e-04
Loss = 3.2951e-03, PNorm = 184.2736, GNorm = 0.1112, lr_0 = 1.2048e-04
Loss = 3.0842e-03, PNorm = 184.2766, GNorm = 0.0876, lr_0 = 1.2040e-04
Loss = 6.1089e-03, PNorm = 184.2799, GNorm = 0.1145, lr_0 = 1.2032e-04
Loss = 1.5669e-03, PNorm = 184.2825, GNorm = 0.0847, lr_0 = 1.2023e-04
Loss = 2.1106e-03, PNorm = 184.2845, GNorm = 0.1149, lr_0 = 1.2015e-04
Loss = 1.7049e-03, PNorm = 184.2867, GNorm = 0.0911, lr_0 = 1.2007e-04
Loss = 1.7842e-03, PNorm = 184.2889, GNorm = 0.0420, lr_0 = 1.1999e-04
Loss = 2.0771e-03, PNorm = 184.2903, GNorm = 0.2208, lr_0 = 1.1991e-04
Loss = 1.8194e-03, PNorm = 184.2913, GNorm = 0.1012, lr_0 = 1.1982e-04
Loss = 2.5818e-03, PNorm = 184.2932, GNorm = 0.1421, lr_0 = 1.1974e-04
Loss = 2.7780e-03, PNorm = 184.2948, GNorm = 0.1044, lr_0 = 1.1966e-04
Loss = 3.5923e-03, PNorm = 184.2960, GNorm = 0.0340, lr_0 = 1.1958e-04
Loss = 2.0237e-03, PNorm = 184.2978, GNorm = 0.1441, lr_0 = 1.1950e-04
Loss = 6.2145e-03, PNorm = 184.3003, GNorm = 0.0519, lr_0 = 1.1941e-04
Loss = 3.1146e-03, PNorm = 184.3017, GNorm = 0.0704, lr_0 = 1.1933e-04
Loss = 1.3729e-03, PNorm = 184.3031, GNorm = 0.1288, lr_0 = 1.1925e-04
Loss = 1.3158e-03, PNorm = 184.3043, GNorm = 0.1080, lr_0 = 1.1917e-04
Loss = 4.3000e-03, PNorm = 184.3064, GNorm = 0.1650, lr_0 = 1.1909e-04
Loss = 2.8463e-03, PNorm = 184.3077, GNorm = 0.0569, lr_0 = 1.1901e-04
Loss = 2.2069e-03, PNorm = 184.3092, GNorm = 0.0746, lr_0 = 1.1892e-04
Loss = 3.9402e-03, PNorm = 184.3111, GNorm = 0.1112, lr_0 = 1.1884e-04
Loss = 5.8974e-03, PNorm = 184.3123, GNorm = 0.0588, lr_0 = 1.1876e-04
Loss = 1.2270e-03, PNorm = 184.3134, GNorm = 0.1197, lr_0 = 1.1868e-04
Loss = 4.5093e-03, PNorm = 184.3152, GNorm = 0.0779, lr_0 = 1.1860e-04
Loss = 7.1367e-03, PNorm = 184.3157, GNorm = 0.0984, lr_0 = 1.1852e-04
Loss = 4.0489e-03, PNorm = 184.3177, GNorm = 0.0609, lr_0 = 1.1844e-04
Loss = 1.6423e-03, PNorm = 184.3194, GNorm = 0.0927, lr_0 = 1.1835e-04
Loss = 3.1843e-03, PNorm = 184.3220, GNorm = 0.0430, lr_0 = 1.1827e-04
Loss = 7.0463e-03, PNorm = 184.3235, GNorm = 0.0885, lr_0 = 1.1819e-04
Loss = 4.1306e-03, PNorm = 184.3250, GNorm = 0.0912, lr_0 = 1.1811e-04
Loss = 5.6676e-03, PNorm = 184.3272, GNorm = 0.1221, lr_0 = 1.1803e-04
Loss = 2.8218e-03, PNorm = 184.3289, GNorm = 0.1023, lr_0 = 1.1795e-04
Loss = 6.7811e-03, PNorm = 184.3297, GNorm = 0.1000, lr_0 = 1.1787e-04
Validation mae = 0.120738
Epoch 28
Loss = 2.3432e-03, PNorm = 184.3307, GNorm = 0.1809, lr_0 = 1.1779e-04
Loss = 2.2098e-03, PNorm = 184.3313, GNorm = 0.1030, lr_0 = 1.1771e-04
Loss = 1.5170e-03, PNorm = 184.3325, GNorm = 0.0643, lr_0 = 1.1763e-04
Loss = 1.5683e-03, PNorm = 184.3336, GNorm = 0.0524, lr_0 = 1.1755e-04
Loss = 2.0906e-03, PNorm = 184.3351, GNorm = 0.1088, lr_0 = 1.1747e-04
Loss = 1.4552e-03, PNorm = 184.3362, GNorm = 0.0660, lr_0 = 1.1739e-04
Loss = 2.0725e-03, PNorm = 184.3373, GNorm = 0.0354, lr_0 = 1.1730e-04
Loss = 1.6179e-03, PNorm = 184.3378, GNorm = 0.0953, lr_0 = 1.1722e-04
Loss = 3.9807e-03, PNorm = 184.3395, GNorm = 0.2922, lr_0 = 1.1714e-04
Loss = 2.6544e-03, PNorm = 184.3388, GNorm = 0.0319, lr_0 = 1.1706e-04
Loss = 3.8012e-03, PNorm = 184.3398, GNorm = 0.2078, lr_0 = 1.1698e-04
Loss = 1.2309e-03, PNorm = 184.3419, GNorm = 0.1947, lr_0 = 1.1690e-04
Loss = 3.8364e-03, PNorm = 184.3429, GNorm = 0.0793, lr_0 = 1.1682e-04
Loss = 1.3631e-02, PNorm = 184.3452, GNorm = 0.1275, lr_0 = 1.1674e-04
Loss = 1.8418e-03, PNorm = 184.3470, GNorm = 0.1504, lr_0 = 1.1666e-04
Loss = 3.6259e-03, PNorm = 184.3485, GNorm = 0.1351, lr_0 = 1.1658e-04
Loss = 2.4855e-03, PNorm = 184.3509, GNorm = 0.0677, lr_0 = 1.1650e-04
Loss = 1.6140e-03, PNorm = 184.3524, GNorm = 0.1558, lr_0 = 1.1642e-04
Loss = 1.9847e-03, PNorm = 184.3533, GNorm = 0.0468, lr_0 = 1.1634e-04
Loss = 3.0466e-03, PNorm = 184.3550, GNorm = 0.0626, lr_0 = 1.1626e-04
Loss = 1.3743e-03, PNorm = 184.3568, GNorm = 0.0569, lr_0 = 1.1618e-04
Loss = 7.6116e-03, PNorm = 184.3570, GNorm = 0.1307, lr_0 = 1.1611e-04
Loss = 5.0150e-03, PNorm = 184.3588, GNorm = 0.0834, lr_0 = 1.1603e-04
Loss = 5.0867e-03, PNorm = 184.3605, GNorm = 0.0976, lr_0 = 1.1595e-04
Loss = 2.4291e-03, PNorm = 184.3609, GNorm = 0.1328, lr_0 = 1.1587e-04
Loss = 3.8804e-03, PNorm = 184.3608, GNorm = 0.3772, lr_0 = 1.1579e-04
Loss = 1.4466e-03, PNorm = 184.3614, GNorm = 0.0719, lr_0 = 1.1571e-04
Loss = 1.8491e-03, PNorm = 184.3615, GNorm = 0.0921, lr_0 = 1.1563e-04
Loss = 4.0946e-03, PNorm = 184.3630, GNorm = 0.0803, lr_0 = 1.1555e-04
Loss = 2.7514e-03, PNorm = 184.3646, GNorm = 0.1343, lr_0 = 1.1547e-04
Loss = 6.2321e-03, PNorm = 184.3638, GNorm = 0.0866, lr_0 = 1.1539e-04
Loss = 1.9061e-03, PNorm = 184.3636, GNorm = 0.2001, lr_0 = 1.1531e-04
Loss = 4.2489e-03, PNorm = 184.3661, GNorm = 0.1727, lr_0 = 1.1523e-04
Loss = 2.2424e-03, PNorm = 184.3666, GNorm = 0.0814, lr_0 = 1.1515e-04
Loss = 1.9154e-03, PNorm = 184.3684, GNorm = 0.0820, lr_0 = 1.1508e-04
Loss = 1.9511e-03, PNorm = 184.3712, GNorm = 0.2746, lr_0 = 1.1500e-04
Loss = 3.1235e-03, PNorm = 184.3724, GNorm = 0.0660, lr_0 = 1.1492e-04
Loss = 2.9567e-03, PNorm = 184.3732, GNorm = 0.0823, lr_0 = 1.1484e-04
Loss = 2.7828e-03, PNorm = 184.3732, GNorm = 0.1391, lr_0 = 1.1476e-04
Loss = 2.5829e-03, PNorm = 184.3739, GNorm = 0.0807, lr_0 = 1.1468e-04
Loss = 5.0339e-03, PNorm = 184.3749, GNorm = 0.1136, lr_0 = 1.1460e-04
Loss = 2.8172e-03, PNorm = 184.3761, GNorm = 0.1089, lr_0 = 1.1452e-04
Loss = 1.1195e-03, PNorm = 184.3772, GNorm = 0.0728, lr_0 = 1.1445e-04
Loss = 1.0616e-03, PNorm = 184.3785, GNorm = 0.0886, lr_0 = 1.1437e-04
Loss = 2.8812e-03, PNorm = 184.3798, GNorm = 0.0776, lr_0 = 1.1429e-04
Loss = 3.0376e-03, PNorm = 184.3798, GNorm = 0.1422, lr_0 = 1.1421e-04
Loss = 1.9509e-03, PNorm = 184.3806, GNorm = 0.1319, lr_0 = 1.1413e-04
Loss = 1.2704e-03, PNorm = 184.3820, GNorm = 0.1671, lr_0 = 1.1405e-04
Loss = 3.7532e-03, PNorm = 184.3831, GNorm = 0.0861, lr_0 = 1.1398e-04
Loss = 2.2985e-03, PNorm = 184.3854, GNorm = 0.0823, lr_0 = 1.1390e-04
Loss = 6.6591e-03, PNorm = 184.3867, GNorm = 0.3843, lr_0 = 1.1382e-04
Loss = 2.3816e-03, PNorm = 184.3888, GNorm = 0.0658, lr_0 = 1.1374e-04
Loss = 3.6297e-03, PNorm = 184.3898, GNorm = 0.0569, lr_0 = 1.1366e-04
Loss = 1.8994e-03, PNorm = 184.3907, GNorm = 0.1336, lr_0 = 1.1359e-04
Loss = 2.1766e-03, PNorm = 184.3922, GNorm = 0.1942, lr_0 = 1.1351e-04
Loss = 2.9494e-03, PNorm = 184.3923, GNorm = 0.0592, lr_0 = 1.1343e-04
Loss = 3.7208e-03, PNorm = 184.3940, GNorm = 0.0777, lr_0 = 1.1335e-04
Loss = 3.7967e-03, PNorm = 184.3968, GNorm = 0.2694, lr_0 = 1.1328e-04
Loss = 4.7402e-03, PNorm = 184.3978, GNorm = 0.2481, lr_0 = 1.1320e-04
Loss = 4.6708e-03, PNorm = 184.3995, GNorm = 0.1515, lr_0 = 1.1312e-04
Loss = 1.4970e-03, PNorm = 184.4009, GNorm = 0.1427, lr_0 = 1.1304e-04
Loss = 6.5865e-03, PNorm = 184.4003, GNorm = 0.0688, lr_0 = 1.1297e-04
Loss = 2.9376e-03, PNorm = 184.4016, GNorm = 0.1054, lr_0 = 1.1289e-04
Loss = 1.3286e-03, PNorm = 184.4032, GNorm = 0.0827, lr_0 = 1.1281e-04
Loss = 1.6018e-03, PNorm = 184.4042, GNorm = 0.1840, lr_0 = 1.1273e-04
Loss = 3.7157e-03, PNorm = 184.4058, GNorm = 0.6305, lr_0 = 1.1266e-04
Loss = 1.2113e-03, PNorm = 184.4074, GNorm = 0.1178, lr_0 = 1.1258e-04
Loss = 2.4244e-03, PNorm = 184.4095, GNorm = 0.0525, lr_0 = 1.1250e-04
Loss = 5.8680e-03, PNorm = 184.4119, GNorm = 0.1341, lr_0 = 1.1243e-04
Loss = 1.4642e-03, PNorm = 184.4138, GNorm = 0.0360, lr_0 = 1.1235e-04
Loss = 1.9823e-03, PNorm = 184.4155, GNorm = 0.1340, lr_0 = 1.1227e-04
Loss = 1.1555e-03, PNorm = 184.4163, GNorm = 0.1210, lr_0 = 1.1219e-04
Loss = 2.6124e-03, PNorm = 184.4168, GNorm = 0.0744, lr_0 = 1.1212e-04
Loss = 3.2742e-03, PNorm = 184.4174, GNorm = 0.0977, lr_0 = 1.1204e-04
Loss = 6.5021e-03, PNorm = 184.4186, GNorm = 0.0481, lr_0 = 1.1196e-04
Loss = 3.6563e-03, PNorm = 184.4200, GNorm = 0.1210, lr_0 = 1.1189e-04
Loss = 1.6286e-03, PNorm = 184.4224, GNorm = 0.0800, lr_0 = 1.1181e-04
Loss = 1.4000e-03, PNorm = 184.4233, GNorm = 0.1589, lr_0 = 1.1173e-04
Loss = 3.7361e-03, PNorm = 184.4235, GNorm = 0.1345, lr_0 = 1.1166e-04
Loss = 3.3220e-03, PNorm = 184.4238, GNorm = 0.1199, lr_0 = 1.1158e-04
Loss = 2.3571e-03, PNorm = 184.4259, GNorm = 0.1172, lr_0 = 1.1150e-04
Loss = 2.2977e-03, PNorm = 184.4277, GNorm = 0.0457, lr_0 = 1.1143e-04
Loss = 6.5661e-03, PNorm = 184.4296, GNorm = 0.0838, lr_0 = 1.1135e-04
Loss = 1.9728e-03, PNorm = 184.4317, GNorm = 0.1082, lr_0 = 1.1128e-04
Loss = 3.3192e-03, PNorm = 184.4355, GNorm = 0.3215, lr_0 = 1.1120e-04
Loss = 2.0459e-03, PNorm = 184.4379, GNorm = 0.0412, lr_0 = 1.1112e-04
Loss = 1.5437e-03, PNorm = 184.4383, GNorm = 0.1269, lr_0 = 1.1105e-04
Loss = 3.5321e-03, PNorm = 184.4389, GNorm = 0.0706, lr_0 = 1.1097e-04
Loss = 1.7219e-03, PNorm = 184.4403, GNorm = 0.0975, lr_0 = 1.1089e-04
Loss = 3.0263e-03, PNorm = 184.4419, GNorm = 0.1004, lr_0 = 1.1082e-04
Loss = 2.3373e-03, PNorm = 184.4436, GNorm = 0.0730, lr_0 = 1.1074e-04
Loss = 6.4721e-03, PNorm = 184.4455, GNorm = 0.0656, lr_0 = 1.1067e-04
Loss = 1.0034e-03, PNorm = 184.4473, GNorm = 0.0422, lr_0 = 1.1059e-04
Loss = 1.1919e-03, PNorm = 184.4475, GNorm = 0.1160, lr_0 = 1.1052e-04
Loss = 2.0393e-03, PNorm = 184.4484, GNorm = 0.0565, lr_0 = 1.1044e-04
Loss = 2.8524e-03, PNorm = 184.4500, GNorm = 0.1151, lr_0 = 1.1036e-04
Loss = 1.0089e-03, PNorm = 184.4502, GNorm = 0.0571, lr_0 = 1.1029e-04
Loss = 3.8189e-03, PNorm = 184.4506, GNorm = 0.2017, lr_0 = 1.1021e-04
Loss = 2.1795e-03, PNorm = 184.4516, GNorm = 0.1482, lr_0 = 1.1014e-04
Loss = 2.6827e-03, PNorm = 184.4528, GNorm = 0.1178, lr_0 = 1.1006e-04
Loss = 2.1282e-03, PNorm = 184.4534, GNorm = 0.1033, lr_0 = 1.0999e-04
Loss = 7.7819e-03, PNorm = 184.4556, GNorm = 0.1186, lr_0 = 1.0991e-04
Loss = 7.0822e-03, PNorm = 184.4588, GNorm = 0.6090, lr_0 = 1.0984e-04
Loss = 1.3486e-03, PNorm = 184.4593, GNorm = 0.2378, lr_0 = 1.0976e-04
Loss = 3.3336e-03, PNorm = 184.4609, GNorm = 0.0649, lr_0 = 1.0969e-04
Loss = 4.9076e-03, PNorm = 184.4632, GNorm = 0.1865, lr_0 = 1.0961e-04
Loss = 1.9069e-03, PNorm = 184.4654, GNorm = 0.1216, lr_0 = 1.0954e-04
Loss = 2.2204e-03, PNorm = 184.4677, GNorm = 0.0806, lr_0 = 1.0946e-04
Loss = 4.4060e-03, PNorm = 184.4703, GNorm = 0.3419, lr_0 = 1.0939e-04
Loss = 2.0615e-03, PNorm = 184.4717, GNorm = 0.1586, lr_0 = 1.0931e-04
Loss = 4.4927e-03, PNorm = 184.4733, GNorm = 0.1240, lr_0 = 1.0924e-04
Loss = 2.7082e-03, PNorm = 184.4747, GNorm = 0.0774, lr_0 = 1.0916e-04
Loss = 5.1617e-03, PNorm = 184.4759, GNorm = 0.0825, lr_0 = 1.0909e-04
Loss = 3.1671e-03, PNorm = 184.4766, GNorm = 0.1160, lr_0 = 1.0901e-04
Loss = 7.0209e-03, PNorm = 184.4777, GNorm = 0.1039, lr_0 = 1.0894e-04
Loss = 1.2026e-03, PNorm = 184.4797, GNorm = 0.0643, lr_0 = 1.0886e-04
Loss = 2.2277e-03, PNorm = 184.4809, GNorm = 0.1689, lr_0 = 1.0879e-04
Loss = 2.2321e-03, PNorm = 184.4809, GNorm = 0.0877, lr_0 = 1.0871e-04
Loss = 1.1038e-03, PNorm = 184.4831, GNorm = 0.0541, lr_0 = 1.0864e-04
Loss = 4.1769e-03, PNorm = 184.4852, GNorm = 0.1122, lr_0 = 1.0856e-04
Validation mae = 0.120996
Epoch 29
Loss = 2.0386e-03, PNorm = 184.4862, GNorm = 0.0646, lr_0 = 1.0849e-04
Loss = 2.1571e-03, PNorm = 184.4877, GNorm = 0.1610, lr_0 = 1.0841e-04
Loss = 1.2437e-03, PNorm = 184.4882, GNorm = 0.0682, lr_0 = 1.0834e-04
Loss = 2.1136e-03, PNorm = 184.4884, GNorm = 0.1034, lr_0 = 1.0827e-04
Loss = 1.5819e-03, PNorm = 184.4890, GNorm = 0.1851, lr_0 = 1.0819e-04
Loss = 3.7655e-03, PNorm = 184.4895, GNorm = 0.1095, lr_0 = 1.0812e-04
Loss = 3.3907e-03, PNorm = 184.4900, GNorm = 0.0527, lr_0 = 1.0804e-04
Loss = 2.8897e-03, PNorm = 184.4913, GNorm = 0.1112, lr_0 = 1.0797e-04
Loss = 1.7427e-03, PNorm = 184.4921, GNorm = 0.0814, lr_0 = 1.0790e-04
Loss = 1.1162e-03, PNorm = 184.4928, GNorm = 0.0485, lr_0 = 1.0782e-04
Loss = 3.1247e-03, PNorm = 184.4937, GNorm = 0.1854, lr_0 = 1.0775e-04
Loss = 1.1082e-03, PNorm = 184.4961, GNorm = 0.0897, lr_0 = 1.0767e-04
Loss = 1.2038e-03, PNorm = 184.4979, GNorm = 0.0734, lr_0 = 1.0760e-04
Loss = 3.8277e-03, PNorm = 184.4991, GNorm = 0.0431, lr_0 = 1.0753e-04
Loss = 1.6051e-03, PNorm = 184.5000, GNorm = 0.0322, lr_0 = 1.0745e-04
Loss = 1.1074e-03, PNorm = 184.5016, GNorm = 0.0482, lr_0 = 1.0738e-04
Loss = 1.8726e-03, PNorm = 184.5028, GNorm = 0.2152, lr_0 = 1.0731e-04
Loss = 9.7233e-04, PNorm = 184.5042, GNorm = 0.0585, lr_0 = 1.0723e-04
Loss = 4.1193e-03, PNorm = 184.5058, GNorm = 0.1333, lr_0 = 1.0716e-04
Loss = 1.5882e-03, PNorm = 184.5076, GNorm = 0.0465, lr_0 = 1.0709e-04
Loss = 2.4423e-03, PNorm = 184.5090, GNorm = 0.0752, lr_0 = 1.0701e-04
Loss = 3.9730e-03, PNorm = 184.5101, GNorm = 0.1000, lr_0 = 1.0694e-04
Loss = 3.3255e-03, PNorm = 184.5121, GNorm = 0.1374, lr_0 = 1.0687e-04
Loss = 2.9743e-03, PNorm = 184.5128, GNorm = 0.0939, lr_0 = 1.0679e-04
Loss = 1.5313e-03, PNorm = 184.5130, GNorm = 0.0779, lr_0 = 1.0672e-04
Loss = 1.9814e-03, PNorm = 184.5139, GNorm = 0.0745, lr_0 = 1.0665e-04
Loss = 6.5319e-03, PNorm = 184.5136, GNorm = 0.1701, lr_0 = 1.0657e-04
Loss = 1.8622e-03, PNorm = 184.5126, GNorm = 0.0769, lr_0 = 1.0650e-04
Loss = 1.5793e-03, PNorm = 184.5137, GNorm = 0.1343, lr_0 = 1.0643e-04
Loss = 2.1202e-03, PNorm = 184.5161, GNorm = 0.1071, lr_0 = 1.0635e-04
Loss = 9.1672e-04, PNorm = 184.5179, GNorm = 0.1072, lr_0 = 1.0628e-04
Loss = 1.2806e-03, PNorm = 184.5179, GNorm = 0.1004, lr_0 = 1.0621e-04
Loss = 2.4656e-03, PNorm = 184.5181, GNorm = 0.0929, lr_0 = 1.0614e-04
Loss = 2.8725e-03, PNorm = 184.5204, GNorm = 0.0934, lr_0 = 1.0606e-04
Loss = 1.1096e-03, PNorm = 184.5213, GNorm = 0.0686, lr_0 = 1.0599e-04
Loss = 9.1408e-04, PNorm = 184.5223, GNorm = 0.0450, lr_0 = 1.0592e-04
Loss = 8.3658e-04, PNorm = 184.5233, GNorm = 0.1025, lr_0 = 1.0585e-04
Loss = 1.4364e-03, PNorm = 184.5238, GNorm = 0.0975, lr_0 = 1.0577e-04
Loss = 6.6090e-03, PNorm = 184.5245, GNorm = 0.1730, lr_0 = 1.0570e-04
Loss = 8.9296e-04, PNorm = 184.5246, GNorm = 0.0431, lr_0 = 1.0563e-04
Loss = 1.0012e-03, PNorm = 184.5249, GNorm = 0.0773, lr_0 = 1.0556e-04
Loss = 3.9712e-03, PNorm = 184.5271, GNorm = 0.1746, lr_0 = 1.0548e-04
Loss = 1.2826e-03, PNorm = 184.5279, GNorm = 0.0837, lr_0 = 1.0541e-04
Loss = 5.0553e-03, PNorm = 184.5287, GNorm = 0.0502, lr_0 = 1.0534e-04
Loss = 9.5014e-04, PNorm = 184.5307, GNorm = 0.1014, lr_0 = 1.0527e-04
Loss = 1.7679e-03, PNorm = 184.5310, GNorm = 0.1474, lr_0 = 1.0519e-04
Loss = 3.4211e-03, PNorm = 184.5308, GNorm = 0.0588, lr_0 = 1.0512e-04
Loss = 1.2628e-03, PNorm = 184.5317, GNorm = 0.1342, lr_0 = 1.0505e-04
Loss = 2.3185e-03, PNorm = 184.5338, GNorm = 0.0847, lr_0 = 1.0498e-04
Loss = 1.1327e-03, PNorm = 184.5347, GNorm = 0.0788, lr_0 = 1.0491e-04
Loss = 4.2132e-03, PNorm = 184.5359, GNorm = 0.0537, lr_0 = 1.0483e-04
Loss = 1.0804e-03, PNorm = 184.5373, GNorm = 0.0350, lr_0 = 1.0476e-04
Loss = 1.5456e-03, PNorm = 184.5390, GNorm = 0.1901, lr_0 = 1.0469e-04
Loss = 4.0901e-03, PNorm = 184.5407, GNorm = 0.0926, lr_0 = 1.0462e-04
Loss = 9.3895e-04, PNorm = 184.5421, GNorm = 0.1033, lr_0 = 1.0455e-04
Loss = 1.3597e-03, PNorm = 184.5428, GNorm = 0.0765, lr_0 = 1.0448e-04
Loss = 2.1855e-03, PNorm = 184.5441, GNorm = 0.0382, lr_0 = 1.0440e-04
Loss = 7.6394e-03, PNorm = 184.5437, GNorm = 0.2249, lr_0 = 1.0433e-04
Loss = 7.4532e-03, PNorm = 184.5458, GNorm = 0.0768, lr_0 = 1.0426e-04
Loss = 2.9607e-03, PNorm = 184.5480, GNorm = 0.0922, lr_0 = 1.0419e-04
Loss = 1.8502e-03, PNorm = 184.5487, GNorm = 0.0996, lr_0 = 1.0412e-04
Loss = 2.9591e-03, PNorm = 184.5499, GNorm = 0.0396, lr_0 = 1.0405e-04
Loss = 2.9221e-03, PNorm = 184.5517, GNorm = 0.1544, lr_0 = 1.0398e-04
Loss = 2.3845e-03, PNorm = 184.5524, GNorm = 0.0564, lr_0 = 1.0391e-04
Loss = 2.3982e-03, PNorm = 184.5540, GNorm = 0.0906, lr_0 = 1.0383e-04
Loss = 1.5613e-03, PNorm = 184.5549, GNorm = 0.0865, lr_0 = 1.0376e-04
Loss = 6.2780e-03, PNorm = 184.5560, GNorm = 0.0708, lr_0 = 1.0369e-04
Loss = 7.3653e-03, PNorm = 184.5562, GNorm = 0.1046, lr_0 = 1.0362e-04
Loss = 4.2323e-03, PNorm = 184.5565, GNorm = 0.0445, lr_0 = 1.0355e-04
Loss = 3.3180e-03, PNorm = 184.5573, GNorm = 0.0757, lr_0 = 1.0348e-04
Loss = 1.4230e-03, PNorm = 184.5580, GNorm = 0.0998, lr_0 = 1.0341e-04
Loss = 9.5289e-04, PNorm = 184.5595, GNorm = 0.0387, lr_0 = 1.0334e-04
Loss = 8.3378e-04, PNorm = 184.5615, GNorm = 0.0620, lr_0 = 1.0327e-04
Loss = 4.4318e-03, PNorm = 184.5630, GNorm = 0.0663, lr_0 = 1.0320e-04
Loss = 1.4549e-03, PNorm = 184.5638, GNorm = 0.1491, lr_0 = 1.0312e-04
Loss = 1.6706e-03, PNorm = 184.5645, GNorm = 0.0687, lr_0 = 1.0305e-04
Loss = 4.5128e-03, PNorm = 184.5642, GNorm = 0.2535, lr_0 = 1.0298e-04
Loss = 1.1653e-03, PNorm = 184.5643, GNorm = 0.0791, lr_0 = 1.0291e-04
Loss = 1.8556e-03, PNorm = 184.5662, GNorm = 0.0783, lr_0 = 1.0284e-04
Loss = 2.4656e-03, PNorm = 184.5677, GNorm = 0.0495, lr_0 = 1.0277e-04
Loss = 6.8954e-03, PNorm = 184.5682, GNorm = 0.1232, lr_0 = 1.0270e-04
Loss = 4.1898e-03, PNorm = 184.5689, GNorm = 0.1324, lr_0 = 1.0263e-04
Loss = 5.7742e-03, PNorm = 184.5704, GNorm = 0.0798, lr_0 = 1.0256e-04
Loss = 6.9225e-03, PNorm = 184.5725, GNorm = 0.0437, lr_0 = 1.0249e-04
Loss = 1.3604e-03, PNorm = 184.5753, GNorm = 0.0766, lr_0 = 1.0242e-04
Loss = 3.1902e-03, PNorm = 184.5785, GNorm = 0.1058, lr_0 = 1.0235e-04
Loss = 3.4239e-03, PNorm = 184.5792, GNorm = 0.0710, lr_0 = 1.0228e-04
Loss = 2.5611e-03, PNorm = 184.5813, GNorm = 0.0682, lr_0 = 1.0221e-04
Loss = 1.8602e-03, PNorm = 184.5830, GNorm = 0.0707, lr_0 = 1.0214e-04
Loss = 1.7597e-03, PNorm = 184.5838, GNorm = 0.5570, lr_0 = 1.0207e-04
Loss = 5.8792e-03, PNorm = 184.5841, GNorm = 0.2304, lr_0 = 1.0200e-04
Loss = 2.5572e-03, PNorm = 184.5867, GNorm = 0.4679, lr_0 = 1.0193e-04
Loss = 1.1967e-03, PNorm = 184.5887, GNorm = 0.0828, lr_0 = 1.0186e-04
Loss = 1.3044e-03, PNorm = 184.5896, GNorm = 0.1901, lr_0 = 1.0179e-04
Loss = 6.9278e-03, PNorm = 184.5913, GNorm = 0.0819, lr_0 = 1.0172e-04
Loss = 1.5789e-03, PNorm = 184.5934, GNorm = 0.1012, lr_0 = 1.0165e-04
Loss = 9.7326e-04, PNorm = 184.5952, GNorm = 0.1069, lr_0 = 1.0158e-04
Loss = 9.9015e-04, PNorm = 184.5964, GNorm = 0.0718, lr_0 = 1.0151e-04
Loss = 3.3803e-03, PNorm = 184.5967, GNorm = 0.0738, lr_0 = 1.0144e-04
Loss = 1.6846e-03, PNorm = 184.5964, GNorm = 0.0778, lr_0 = 1.0137e-04
Loss = 8.0478e-03, PNorm = 184.5970, GNorm = 0.0695, lr_0 = 1.0130e-04
Loss = 4.5557e-03, PNorm = 184.5982, GNorm = 0.0738, lr_0 = 1.0123e-04
Loss = 6.0195e-03, PNorm = 184.5996, GNorm = 0.0796, lr_0 = 1.0116e-04
Loss = 1.5706e-03, PNorm = 184.6006, GNorm = 0.0820, lr_0 = 1.0110e-04
Loss = 4.4231e-03, PNorm = 184.6024, GNorm = 0.1595, lr_0 = 1.0103e-04
Loss = 1.8395e-03, PNorm = 184.6045, GNorm = 0.1063, lr_0 = 1.0096e-04
Loss = 2.1405e-03, PNorm = 184.6051, GNorm = 0.0799, lr_0 = 1.0089e-04
Loss = 8.6755e-04, PNorm = 184.6051, GNorm = 0.0862, lr_0 = 1.0082e-04
Loss = 4.7653e-03, PNorm = 184.6059, GNorm = 0.1104, lr_0 = 1.0075e-04
Loss = 3.0240e-03, PNorm = 184.6062, GNorm = 0.1311, lr_0 = 1.0068e-04
Loss = 1.2289e-03, PNorm = 184.6076, GNorm = 0.0966, lr_0 = 1.0061e-04
Loss = 3.2510e-03, PNorm = 184.6091, GNorm = 0.6037, lr_0 = 1.0054e-04
Loss = 1.3745e-02, PNorm = 184.6119, GNorm = 0.7317, lr_0 = 1.0047e-04
Loss = 3.7967e-03, PNorm = 184.6128, GNorm = 0.4270, lr_0 = 1.0041e-04
Loss = 4.8446e-03, PNorm = 184.6128, GNorm = 0.1168, lr_0 = 1.0034e-04
Loss = 2.1904e-03, PNorm = 184.6136, GNorm = 0.0574, lr_0 = 1.0027e-04
Loss = 3.0471e-03, PNorm = 184.6138, GNorm = 0.2143, lr_0 = 1.0020e-04
Loss = 3.2812e-03, PNorm = 184.6144, GNorm = 0.1576, lr_0 = 1.0013e-04
Loss = 5.7879e-03, PNorm = 184.6158, GNorm = 0.0462, lr_0 = 1.0006e-04
Loss = 3.4929e-03, PNorm = 184.6182, GNorm = 0.2299, lr_0 = 1.0000e-04
Validation mae = 0.120991
Model 0 best validation mae = 0.120738 on epoch 27
Loading pretrained parameter "ffn.1.weight".
Loading pretrained parameter "ffn.1.bias".
Loading pretrained parameter "ffn.4.weight".
Loading pretrained parameter "ffn.4.bias".
Loading pretrained parameter "ffn.7.weight".
Loading pretrained parameter "ffn.7.bias".
Moving model to cuda
Model 0 test mae = 0.119646
Ensemble test mae = 0.119646
10-fold cross validation
	Seed 0 ==> test mae = 0.119489
	Seed 1 ==> test mae = 0.119513
	Seed 2 ==> test mae = 0.119969
	Seed 3 ==> test mae = 0.119205
	Seed 4 ==> test mae = 0.119779
	Seed 5 ==> test mae = 0.119689
	Seed 6 ==> test mae = 0.119483
	Seed 7 ==> test mae = 0.119594
	Seed 8 ==> test mae = 0.119693
	Seed 9 ==> test mae = 0.119646
Overall test mae = 0.119606 +/- 0.000194
Elapsed time = 4:41:06
